[llvm-commits] [llvm] r46949 - in /llvm/trunk/lib/Target: TargetSelectionDAG.td X86/README-SSE.txt X86/X86ISelLowering.cpp X86/X86ISelLowering.h X86/X86InstrSSE.td

Evan Cheng evan.cheng at apple.com
Tue Feb 12 00:00:11 PST 2008


On Feb 10, 2008, at 8:19 PM, Nate Begeman wrote:

>
> +
> +  if (Subtarget->hasSSE41()) {
> ...
...
>
> +    if (Subtarget->is64Bit()) {
> +      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Legal);
> +      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Legal);
> +
> +      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
> +      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
> +    }
> +  }

I don't see the corresponding patterns? This breaks X86/illegal- 
insert.ll on SSE4 capable machines. I am going to change them from  
'legal' to 'custom' for now. Please fix if that's not right.

Thanks,

Evan

>
>
>   // We want to custom lower some of our intrinsics.
>   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
> @@ -3655,10 +3682,34 @@
> }
>
> SDOperand
> +X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,
> +                                                SelectionDAG &DAG) {
> +  MVT::ValueType VT = Op.getValueType();
> +  if (MVT::getSizeInBits(VT) == 8) {
> +    SDOperand Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32,
> +                                    Op.getOperand(0),  
> Op.getOperand(1));
> +    SDOperand Assert  = DAG.getNode(ISD::AssertZext, MVT::i32,  
> Extract,
> +                                    DAG.getValueType(VT));
> +    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
> +  } else if (MVT::getSizeInBits(VT) == 16) {
> +    SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32,
> +                                    Op.getOperand(0),  
> Op.getOperand(1));
> +    SDOperand Assert  = DAG.getNode(ISD::AssertZext, MVT::i32,  
> Extract,
> +                                    DAG.getValueType(VT));
> +    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
> +  }
> +  return SDOperand();
> +}
> +
> +
> +SDOperand
> X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op,  
> SelectionDAG &DAG) {
>   if (!isa<ConstantSDNode>(Op.getOperand(1)))
>     return SDOperand();
>
> +  if (Subtarget->hasSSE41())
> +    return LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
> +
>   MVT::ValueType VT = Op.getValueType();
>   // TODO: handle v16i8.
>   if (MVT::getSizeInBits(VT) == 16) {
> @@ -3699,6 +3750,9 @@
>     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
>                        DAG.getIntPtrConstant(0));
>   } else if (MVT::getSizeInBits(VT) == 64) {
> +    // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on  
> 32b
> +    // FIXME: seems like this should be unnecessary if mov{h,l}pd  
> were taught
> +    //        to match extract_elt for f64.
>     unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
>     if (Idx == 0)
>       return Op;
> @@ -3724,9 +3778,47 @@
> }
>
> SDOperand
> +X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDOperand Op,  
> SelectionDAG &DAG){
> +  MVT::ValueType VT = Op.getValueType();
> +  MVT::ValueType EVT = MVT::getVectorElementType(VT);
> +
> +  SDOperand N0 = Op.getOperand(0);
> +  SDOperand N1 = Op.getOperand(1);
> +  SDOperand N2 = Op.getOperand(2);
> +
> +  if ((MVT::getSizeInBits(EVT) == 8) || (MVT::getSizeInBits(EVT) ==  
> 16)) {
> +    unsigned Opc = (MVT::getSizeInBits(EVT) == 8) ? X86ISD::PINSRB
> +                                                  : X86ISD::PINSRW;
> +    // Transform it so it match pinsr{b,w} which expects a GR32 as  
> its second
> +    // argument.
> +    if (N1.getValueType() != MVT::i32)
> +      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
> +    if (N2.getValueType() != MVT::i32)
> +      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)- 
> >getValue());
> +    return DAG.getNode(Opc, VT, N0, N1, N2);
> +  } else if (EVT == MVT::f32) {
> +    // Bits [7:6] of the constant are the source select.  This will  
> always be
> +    //  zero here.  The DAG Combiner may combine an extract_elt  
> index into these
> +    //  bits.  For example (insert (extract, 3), 2) could be  
> matched by putting
> +    //  the '3' into bits [7:6] of X86ISD::INSERTPS.
> +    // Bits [5:4] of the constant are the destination select.  This  
> is the
> +    //  value of the incoming immediate.
> +    // Bits [3:0] of the constant are the zero mask.  The DAG  
> Combiner may
> +    //   combine either bitwise AND or insert of float 0.0 to set  
> these bits.
> +    N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue()  
> << 4);
> +    return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2);
> +  }
> +  return SDOperand();
> +}
> +
> +SDOperand
> X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG  
> &DAG) {
>   MVT::ValueType VT = Op.getValueType();
>   MVT::ValueType EVT = MVT::getVectorElementType(VT);
> +
> +  if (Subtarget->hasSSE41())
> +    return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
> +
>   if (EVT == MVT::i8)
>     return SDOperand();
>
> @@ -5273,7 +5365,10 @@
>   case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
>   case X86ISD::Wrapper:            return "X86ISD::Wrapper";
>   case X86ISD::S2VEC:              return "X86ISD::S2VEC";
> +  case X86ISD::PEXTRB:             return "X86ISD::PEXTRB";
>   case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
> +  case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
> +  case X86ISD::PINSRB:             return "X86ISD::PINSRB";
>   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
>   case X86ISD::FMAX:               return "X86ISD::FMAX";
>   case X86ISD::FMIN:               return "X86ISD::FMIN";
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=46949&r1=46948&r2=46949&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sun Feb 10 22:19:36  
> 2008
> @@ -170,10 +170,22 @@
>       /// have to match the operand type.
>       S2VEC,
>
> +      /// PEXTRB - Extract an 8-bit value from a vector and zero  
> extend it to
> +      /// i32, corresponds to X86::PEXTRB.
> +      PEXTRB,
> +
>       /// PEXTRW - Extract a 16-bit value from a vector and zero  
> extend it to
>       /// i32, corresponds to X86::PEXTRW.
>       PEXTRW,
>
> +      /// INSERTPS - Insert any element of a 4 x float vector into  
> any element
> +      /// of a destination 4 x floatvector.
> +      INSERTPS,
> +
> +      /// PINSRB - Insert the lower 8-bits of a 32-bit value to a  
> vector,
> +      /// corresponds to X86::PINSRB.
> +      PINSRB,
> +
>       /// PINSRW - Insert the lower 16-bits of a 32-bit value to a  
> vector,
>       /// corresponds to X86::PINSRW.
>       PINSRW,
> @@ -493,7 +505,9 @@
>     SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG  
> &DAG);
> +    SDOperand LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,  
> SelectionDAG &DAG);
>     SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG);
> +    SDOperand LowerINSERT_VECTOR_ELT_SSE4(SDOperand Op,  
> SelectionDAG &DAG);
>     SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG);
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=46949&r1=46948&r2=46949&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Feb 10 22:19:36 2008
> @@ -35,8 +35,19 @@
> def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
> def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
> def X86s2vec   : SDNode<"X86ISD::S2VEC",  SDTypeProfile<1, 1, []>,  
> []>;
> -def X86pextrw  : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>,  
> []>;
> -def X86pinsrw  : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>,  
> []>;
> +def X86pextrb  : SDNode<"X86ISD::PEXTRB",
> +                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>,  
> SDTCisPtrTy<2>]>>;
> +def X86pextrw  : SDNode<"X86ISD::PEXTRW",
> +                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>,  
> SDTCisPtrTy<2>]>>;
> +def X86pinsrb  : SDNode<"X86ISD::PINSRB",
> +                 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>,  
> SDTCisSameAs<0,1>,
> +                                      SDTCisVT<2, i32>,  
> SDTCisPtrTy<3>]>>;
> +def X86pinsrw  : SDNode<"X86ISD::PINSRW",
> +                 SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>,  
> SDTCisSameAs<0,1>,
> +                                      SDTCisVT<2, i32>,  
> SDTCisPtrTy<3>]>>;
> +def X86insrtps : SDNode<"X86ISD::INSERTPS",
> +                 SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>,  
> SDTCisSameAs<0,1>,
> +                                      SDTCisVT<2, f32>,  
> SDTCisPtrTy<3>]>>;
>
> // 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
> // SSE 'Special' Instructions
> @@ -2087,23 +2098,21 @@
>                     (outs GR32:$dst), (ins VR128:$src1, i32i8imm: 
> $src2),
>                     "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
>                     [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
> -                                     (iPTR imm:$src2)))]>;
> +                                                imm:$src2))]>;
> let isTwoAddress = 1 in {
>   def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
>                        (outs VR128:$dst), (ins VR128:$src1,
>                         GR32:$src2, i32i8imm:$src3),
>                        "pinsrw\t{$src3, $src2, $dst|$dst, $src2,  
> $src3}",
>                        [(set VR128:$dst,
> -                         (v8i16 (X86pinsrw (v8i16 VR128:$src1),
> -                                 GR32:$src2, (iPTR imm:$src3))))]>;
> +                         (X86pinsrw VR128:$src1, GR32:$src2, imm: 
> $src3))]>;
>   def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
>                        (outs VR128:$dst), (ins VR128:$src1,
>                         i16mem:$src2, i32i8imm:$src3),
>                        "pinsrw\t{$src3, $src2, $dst|$dst, $src2,  
> $src3}",
> -                       [(set VR128:$dst,
> -                         (v8i16 (X86pinsrw (v8i16 VR128:$src1),
> -                                 (i32 (anyext (loadi16 addr:$src2))),
> -                                 (iPTR imm:$src3))))]>;
> +                       [(set VR128:$dst,
> +                         (X86pinsrw VR128:$src1, (extloadi16 addr: 
> $src2),
> +                                    imm:$src3))]>;
> }
>
> // Mask creation
> @@ -3255,7 +3264,7 @@
>
>
> /// SS41I_binop_rmi_int - SSE 4.1 binary operator with immediate
> -let isTwoAddress = 1 in {
> +let Uses = [XMM0], isTwoAddress = 1 in {
>   multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr,  
> Intrinsic IntId> {
>     def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
>                     (ins VR128:$src1, VR128:$src2),
> @@ -3328,26 +3337,44 @@
> defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovsxbq",  
> int_x86_sse41_pmovzxbq>;
>
>
> -/// SS41I_binop_ext8 - SSE 4.1 binary operator with immediate
> -multiclass SS41I_binop_ext8<bits<8> opc, string OpcodeStr> {
> +/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8  
> bit mem
> +multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
>   def rr : SS4AI<opc, MRMSrcReg, (outs GR32:$dst),
>                  (ins VR128:$src1, i32i8imm:$src2),
>                  !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> -                 [(set GR32:$dst, (zext
> -                  (extractelt (v16i8 VR128:$src1), imm:$src2)))]>,  
> OpSize;
> +                 [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1),  
> imm:$src2))]>,
> +                 OpSize;
>   def mr : SS4AI<opc, MRMDestMem, (outs),
>                  (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
>                  !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> -                 [(store (extractelt (v16i8 VR128:$src1), imm:$src2),
> -                          addr:$dst)]>, OpSize;
> +                 []>, OpSize;
> +// FIXME:
> +// There's an AssertZext in the way of writing the store pattern
> +// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))),  
> addr:$dst)
> +}
> +
> +defm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
> +
> +
> +/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
> +multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
> +  def mr : SS4AI<opc, MRMDestMem, (outs),
> +                 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
> +                 !strconcat(OpcodeStr,
> +                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> +                 []>, OpSize;
> +// FIXME:
> +// There's an AssertZext in the way of writing the store pattern
> +// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))),  
> addr:$dst)
> }
>
> -defm PEXTRB      : SS41I_binop_ext8<0x14, "pextrb">;
> +defm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
> +
>
> -/// SS41I_binop_ext32 - SSE 4.1 binary operator with immediate
> -multiclass SS41I_binop_ext32<bits<8> opc, string OpcodeStr> {
> +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory  
> destination
> +multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
>   def rr : SS4AI<opc, MRMSrcReg, (outs GR32:$dst),
>                  (ins VR128:$src1, i32i8imm:$src2),
>                  !strconcat(OpcodeStr,
> @@ -3362,10 +3389,11 @@
>                           addr:$dst)]>, OpSize;
> }
>
> -defm PEXTRD      : SS41I_binop_ext32<0x16, "pextrd">;
> +defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
>
> -/// SS41I_binop_extf32 - SSE 4.1 binary operator with immediate
> -multiclass SS41I_binop_extf32<bits<8> opc, string OpcodeStr> {
> +
> +/// SS41I_extractf32 - SSE 4.1 extract 32 bits to fp reg or memory  
> destination
> +multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
>   def rr : SS4AI<opc, MRMSrcReg, (outs FR32:$dst),
>                  (ins VR128:$src1, i32i8imm:$src2),
>                  !strconcat(OpcodeStr,
> @@ -3380,5 +3408,65 @@
>                           addr:$dst)]>, OpSize;
> }
>
> -defm EXTRACTPS   : SS41I_binop_extf32<0x17, "extractps">;
> +defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
> +
> +let isTwoAddress = 1 in {
> +  multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
> +    def rr : SS4AI<opc, MRMSrcReg, (outs VR128:$dst),
> +                   (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
> +                   !strconcat(OpcodeStr,
> +                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> +                   [(set VR128:$dst,
> +                     (X86pinsrb VR128:$src1, GR32:$src2, imm: 
> $src3))]>, OpSize;
> +    def rm : SS4AI<opc, MRMSrcMem, (outs VR128:$dst),
> +                   (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
> +                   !strconcat(OpcodeStr,
> +                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> +                   [(set VR128:$dst,
> +                     (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
> +                                imm:$src3))]>, OpSize;
> +  }
> +}
> +
> +defm PINSRB      : SS41I_insert8<0x20, "pinsrb">;
> +
> +let isTwoAddress = 1 in {
> +  multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
> +    def rr : SS4AI<opc, MRMSrcReg, (outs VR128:$dst),
> +                   (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
> +                   !strconcat(OpcodeStr,
> +                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> +                   [(set VR128:$dst,
> +                     (v4i32 (insertelt VR128:$src1, GR32:$src2, imm: 
> $src3)))]>,
> +                   OpSize;
> +    def rm : SS4AI<opc, MRMSrcMem, (outs VR128:$dst),
> +                   (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
> +                   !strconcat(OpcodeStr,
> +                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> +                   [(set VR128:$dst,
> +                     (v4i32 (insertelt VR128:$src1, (loadi32 addr: 
> $src2),
> +                                       imm:$src3)))]>, OpSize;
> +  }
> +}
> +
> +defm PINSRD      : SS41I_insert32<0x22, "pinsrd">;
> +
> +let isTwoAddress = 1 in {
> +  multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
> +    def rr : SS4AI<opc, MRMSrcReg, (outs VR128:$dst),
> +                   (ins VR128:$src1, FR32:$src2, i32i8imm:$src3),
> +                   !strconcat(OpcodeStr,
> +                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> +                   [(set VR128:$dst,
> +                     (X86insrtps VR128:$src1, FR32:$src2, imm: 
> $src3))]>, OpSize;
> +    def rm : SS4AI<opc, MRMSrcMem, (outs VR128:$dst),
> +                   (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
> +                   !strconcat(OpcodeStr,
> +                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
> +                   [(set VR128:$dst,
> +                     (X86insrtps VR128:$src1, (loadf32 addr:$src2),
> +                                 imm:$src3))]>, OpSize;
> +  }
> +}
>
> +defm INSERTPS    : SS41I_insertf32<0x31, "insertps">;
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list