[llvm] r175619 - I optimized the following patterns:
Muhammad Tauqir Ahmad
muhammad.t.ahmad at intel.com
Mon Feb 25 09:01:27 PST 2013
Hi Elena,
I have another question:
For sext 4xi8 -> 4xi64, the sequence pasted below is produced on
-mcpu=corei7-avx.
Is there are reason the sequence vpmovzxbd+vpslld+vpsrad is produced
instead of just a vpmovsxbd? Maybe I am missing something.
vpmovzxbd (%rax), %xmm0
vpslld $24, %xmm0, %xmm0
vpsrad $24, %xmm0, %xmm1
vpmovsxdq %xmm1, %xmm0
vmovhlps %xmm1, %xmm1, %xmm1 # xmm1 = xmm1[1,1]
vpmovsxdq %xmm1, %xmm1
vinsertf128 $1, %xmm1, %ymm0, %ymm0
vmovaps %ymm0, (%rax)
(sorry for two emails, forgot to reply-all)
- Muhammad Tauqir
On Wed, Feb 20, 2013 at 7:42 AM, Elena Demikhovsky
<elena.demikhovsky at intel.com> wrote:
> Author: delena
> Date: Wed Feb 20 06:42:54 2013
> New Revision: 175619
>
> URL: http://llvm.org/viewvc/llvm-project?rev=175619&view=rev
> Log:
> I optimized the following patterns:
> sext <4 x i1> to <4 x i64>
> sext <4 x i8> to <4 x i64>
> sext <4 x i16> to <4 x i64>
>
> I'm running Combine on SIGN_EXTEND_IN_REG and revert SEXT patterns:
> (sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
>
> The sext_in_reg (v4i32 x) may be lowered to shl+sar operations.
> The "sar" does not exist on 64-bit operation, so lowering sext_in_reg (v4i64 x) has no vector solution.
>
> I also added a cost of this operations to the AVX costs table.
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
> llvm/trunk/test/Analysis/CostModel/X86/cast.ll
> llvm/trunk/test/CodeGen/X86/avx-sext.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=175619&r1=175618&r2=175619&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Feb 20 06:42:54 2013
> @@ -1323,6 +1323,7 @@ X86TargetLowering::X86TargetLowering(X86
> setTargetDAGCombine(ISD::ZERO_EXTEND);
> setTargetDAGCombine(ISD::ANY_EXTEND);
> setTargetDAGCombine(ISD::SIGN_EXTEND);
> + setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
> setTargetDAGCombine(ISD::TRUNCATE);
> setTargetDAGCombine(ISD::SINT_TO_FP);
> setTargetDAGCombine(ISD::SETCC);
> @@ -17076,6 +17077,41 @@ static SDValue PerformVZEXT_MOVLCombine(
> return SDValue();
> }
>
> +static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
> + const X86Subtarget *Subtarget) {
> + EVT VT = N->getValueType(0);
> + if (!VT.isVector())
> + return SDValue();
> +
> + SDValue N0 = N->getOperand(0);
> + SDValue N1 = N->getOperand(1);
> + EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
> + DebugLoc dl = N->getDebugLoc();
> +
> + // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
> + // both SSE and AVX2 since there is no sign-extended shift right
> + // operation on a vector with 64-bit elements.
> + //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
> + // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
> + if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
> + N0.getOpcode() == ISD::SIGN_EXTEND)) {
> + SDValue N00 = N0.getOperand(0);
> +
> + // EXTLOAD has a better solution on AVX2,
> + // it may be replaced with X86ISD::VSEXT node.
> + if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
> + if (!ISD::isNormalLoad(N00.getNode()))
> + return SDValue();
> +
> + if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
> + SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
> + N00, N1);
> + return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
> + }
> + }
> + return SDValue();
> +}
> +
> static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
> TargetLowering::DAGCombinerInfo &DCI,
> const X86Subtarget *Subtarget) {
> @@ -17468,6 +17504,7 @@ SDValue X86TargetLowering::PerformDAGCom
> case ISD::ANY_EXTEND:
> case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
> case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
> + case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
> case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
> case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
> case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
>
> Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=175619&r1=175618&r2=175619&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Wed Feb 20 06:42:54 2013
> @@ -232,6 +232,9 @@ unsigned X86TTI::getCastInstrCost(unsign
> { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
> { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
> { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
> + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
> + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
> + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
> { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
> };
>
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/cast.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/cast.ll?rev=175619&r1=175618&r2=175619&view=diff
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/cast.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/cast.ll Wed Feb 20 06:42:54 2013
> @@ -44,6 +44,10 @@ define i32 @zext_sext(<8 x i1> %in) {
> %B = zext <8 x i16> undef to <8 x i32>
> ;CHECK: cost of 1 {{.*}} sext
> %C = sext <4 x i32> undef to <4 x i64>
> + ;CHECK: cost of 8 {{.*}} sext
> + %C1 = sext <4 x i8> undef to <4 x i64>
> + ;CHECK: cost of 8 {{.*}} sext
> + %C2 = sext <4 x i16> undef to <4 x i64>
>
> ;CHECK: cost of 1 {{.*}} zext
> %D = zext <4 x i32> undef to <4 x i64>
> @@ -59,7 +63,7 @@ define i32 @zext_sext(<8 x i1> %in) {
> ret i32 undef
> }
>
> -define i32 @masks(<8 x i1> %in) {
> +define i32 @masks8(<8 x i1> %in) {
> ;CHECK: cost of 6 {{.*}} zext
> %Z = zext <8 x i1> %in to <8 x i32>
> ;CHECK: cost of 9 {{.*}} sext
> @@ -67,3 +71,9 @@ define i32 @masks(<8 x i1> %in) {
> ret i32 undef
> }
>
> +define i32 @masks4(<4 x i1> %in) {
> + ;CHECK: cost of 8 {{.*}} sext
> + %S = sext <4 x i1> %in to <4 x i64>
> + ret i32 undef
> +}
> +
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-sext.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-sext.ll?rev=175619&r1=175618&r2=175619&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-sext.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-sext.ll Wed Feb 20 06:42:54 2013
> @@ -142,3 +142,26 @@ define <8 x i16> @load_sext_test6(<8 x i
> %Y = sext <8 x i8> %X to <8 x i16>
> ret <8 x i16>%Y
> }
> +
> +; AVX: sext_4i1_to_4i64
> +; AVX: vpslld $31
> +; AVX: vpsrad $31
> +; AVX: vpmovsxdq
> +; AVX: vpmovsxdq
> +; AVX: ret
> +define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
> + %extmask = sext <4 x i1> %mask to <4 x i64>
> + ret <4 x i64> %extmask
> +}
> +
> +; AVX: sext_4i8_to_4i64
> +; AVX: vpslld $24
> +; AVX: vpsrad $24
> +; AVX: vpmovsxdq
> +; AVX: vpmovsxdq
> +; AVX: ret
> +define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
> + %extmask = sext <4 x i8> %mask to <4 x i64>
> + ret <4 x i64> %extmask
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list