[PATCH] X86: deduplicate V[SZ]EXT_MOVL and V[SZ]EXT nodes

Nadav Rotem nrotem at apple.com
Wed Feb 5 09:11:02 PST 2014


It looks correct to me. 

On Feb 5, 2014, at 6:54 AM, Tim Northover <t.p.northover at gmail.com> wrote:

> Hi all,
> 
> I noticed that there seemed to be some confusion with the various vector extension nodes in the X86 backend.
> 
> I believe VZEXT_MOVL means "zero all vector elements except the first" (and should have identical input & output types) whereas VZEXT means "zero extend each element of a vector (discarding higher elements if necessary)".
> 
> For example:
>        (v4i32 (vzext (v16i8 ...)))
> 
> should zero extend the low 4 bytes of the incoming vector to 32-bits, discarding higher bytes.
> 
> However, somewhere in the past, these two concepts had become confused, even leading to a nonsensical VSEXT_MOVL.
> 
> This patch should put things back to something sensible, removing VSEXT_MOVL entirely and pointing the various uses of VZEXT_MOVL at the correct version. As a result some patterns become redundant and have been removed.
> 
> Have I interpreted the backend correctly? It's not one I know very well.
> 
> Cheers.
> 
> Tim.
> 
> http://llvm-reviews.chandlerc.com/D2700
> 
> Files:
>  lib/Target/X86/X86ISelLowering.cpp
>  lib/Target/X86/X86ISelLowering.h
>  lib/Target/X86/X86InstrFragmentsSIMD.td
>  lib/Target/X86/X86InstrSSE.td
> 
> Index: lib/Target/X86/X86ISelLowering.cpp
> ===================================================================
> --- lib/Target/X86/X86ISelLowering.cpp
> +++ lib/Target/X86/X86ISelLowering.cpp
> @@ -9023,7 +9023,7 @@
>     return SDValue();
> 
>   if (Subtarget->hasInt256())
> -    return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In);
> +    return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
> 
>   SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
>   SDValue Undef = DAG.getUNDEF(InVT);
> @@ -10601,7 +10601,7 @@
>     return SDValue();
> 
>   if (Subtarget->hasInt256())
> -    return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In);
> +    return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
> 
>   // Optimize vectors in AVX mode
>   // Sign extend  v8i16 to v8i32 and
> @@ -10630,8 +10630,8 @@
>   MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
>                                 VT.getVectorNumElements()/2);
> 
> -  OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
> -  OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
> +  OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
> +  OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi);
> 
>   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
> }
> @@ -14001,7 +14001,6 @@
>   case X86ISD::ATOMAND64_DAG:      return "X86ISD::ATOMAND64_DAG";
>   case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";
>   case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
> -  case X86ISD::VSEXT_MOVL:         return "X86ISD::VSEXT_MOVL";
>   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
>   case X86ISD::VZEXT:              return "X86ISD::VZEXT";
>   case X86ISD::VSEXT:              return "X86ISD::VSEXT";
> Index: lib/Target/X86/X86ISelLowering.h
> ===================================================================
> --- lib/Target/X86/X86ISelLowering.h
> +++ lib/Target/X86/X86ISelLowering.h
> @@ -245,12 +245,9 @@
>       /// the list of operands.
>       TC_RETURN,
> 
> -      // VZEXT_MOVL - Vector move low and zero extend.
> +      // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements.
>       VZEXT_MOVL,
> 
> -      // VSEXT_MOVL - Vector move low and sign extend.
> -      VSEXT_MOVL,
> -
>       // VZEXT - Vector integer zero-extend.
>       VZEXT,
> 
> Index: lib/Target/X86/X86InstrFragmentsSIMD.td
> ===================================================================
> --- lib/Target/X86/X86InstrFragmentsSIMD.td
> +++ lib/Target/X86/X86InstrFragmentsSIMD.td
> @@ -87,16 +87,6 @@
> def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
>                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
> 
> -def X86vzmovly  : SDNode<"X86ISD::VZEXT_MOVL",
> -                 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
> -                                      SDTCisInt<0>, SDTCisInt<1>,
> -                                      SDTCisOpSmallerThanOp<1, 0> ]>>;
> -
> -def X86vsmovl  : SDNode<"X86ISD::VSEXT_MOVL",
> -                 SDTypeProfile<1, 1,
> -                 [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>,
> -                  SDTCisOpSmallerThanOp<1, 0>]>>;
> -
> def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
>                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
> 
> Index: lib/Target/X86/X86InstrSSE.td
> ===================================================================
> --- lib/Target/X86/X86InstrSSE.td
> +++ lib/Target/X86/X86InstrSSE.td
> @@ -5824,34 +5824,6 @@
>             (PMOVZXDQrm addr:$src)>;
> }
> 
> -let Predicates = [HasAVX2] in {
> -  let AddedComplexity = 15 in {
> -    def : Pat<(v4i64 (X86vzmovly (v4i32 VR128:$src))),
> -              (VPMOVZXDQYrr VR128:$src)>;
> -    def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))),
> -              (VPMOVZXWDYrr VR128:$src)>;
> -    def : Pat<(v16i16 (X86vzmovly (v16i8 VR128:$src))),
> -              (VPMOVZXBWYrr VR128:$src)>;
> -  }
> -
> -  def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
> -  def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
> -  def : Pat<(v16i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
> -}
> -
> -let Predicates = [HasAVX] in {
> -  def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
> -  def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
> -  def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
> -}
> -
> -let Predicates = [UseSSE41] in {
> -  def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
> -  def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
> -  def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
> -}
> -
> -
> multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId,
>                                OpndItins itins = DEFAULT_ITINS> {
>   def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
> @@ -6002,9 +5974,9 @@
>   def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
>             (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
> 
> -  def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
> +  def : Pat<(v8i32 (X86vsext (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
>             (VPMOVSXWDYrm addr:$src)>;
> -  def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
> +  def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
>             (VPMOVSXDQYrm addr:$src)>;
> 
>   def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
> <D2700.1.patch>_______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list