[llvm-commits] [llvm] r48746 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_extract-sse4.ll

Chris Lattner clattner at apple.com
Mon Mar 24 14:54:18 PDT 2008


On Mar 24, 2008, at 2:52 PM, Evan Cheng wrote:

> Author: evancheng
> Date: Mon Mar 24 16:52:23 2008
> New Revision: 48746
>
> URL: http://llvm.org/viewvc/llvm-project?rev=48746&view=rev
> Log:
> - SSE4.1 extractfps extracts a f32 into a gr32 register. Very  
> useful! Not. Fix the instruction specification and teaches lowering  
> code to use it only when the only use is a store instruction.

Heh, it might also be useful for bitcast(extractelement, i32), etc.   
Which could be useful for things like copysign etc.

-Chris

>
>
> Added:
>    llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll
> Modified:
>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>    llvm/trunk/lib/Target/X86/X86InstrSSE.td
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=48746&r1=48745&r2=48746&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Mar 24  
> 16:52:23 2008
> @@ -699,7 +699,7 @@
>     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
>     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
>     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
> -    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
> +    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
>
>     if (Subtarget->is64Bit()) {
>       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Legal);
> @@ -3718,6 +3718,19 @@
>     SDOperand Assert  = DAG.getNode(ISD::AssertZext, MVT::i32,  
> Extract,
>                                     DAG.getValueType(VT));
>     return DAG.getNode(ISD::TRUNCATE, VT, Assert);
> +  } else if (VT == MVT::f32) {
> +    // EXTRACTPS outputs to a GPR32 register which will require a  
> movd to copy
> +    // the result back to FR32 register. It's only worth matching  
> if the
> +    // result has a single use which is a store.
> +    if (!Op.hasOneUse())
> +      return SDOperand();
> +    SDNode *User = *Op.Val->use_begin();
> +    if (User->getOpcode() != ISD::STORE)
> +      return SDOperand();
> +    SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,  
> MVT::i32,
> +                    DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,  
> Op.getOperand(0)),
> +                                    Op.getOperand(1));
> +    return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract);
>   }
>   return SDOperand();
> }
> @@ -3728,8 +3741,11 @@
>   if (!isa<ConstantSDNode>(Op.getOperand(1)))
>     return SDOperand();
>
> -  if (Subtarget->hasSSE41())
> -    return LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
> +  if (Subtarget->hasSSE41()) {
> +    SDOperand Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
> +    if (Res.Val)
> +      return Res;
> +  }
>
>   MVT::ValueType VT = Op.getValueType();
>   // TODO: handle v16i8.
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=48746&r1=48745&r2=48746&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Mar 24 16:52:23 2008
> @@ -3380,19 +3380,22 @@
> defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
>
>
> -/// SS41I_extractf32 - SSE 4.1 extract 32 bits to fp reg or memory  
> destination
> +/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg  
> or memory
> +/// destination
> multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
> -  def rr : SS4AIi8<opc, MRMSrcReg, (outs FR32:$dst),
> +  // Not worth matching to rr form of extractps since the result is  
> in GPR32.
> +  def rr : SS4AIi8<opc, MRMSrcReg, (outs GR32:$dst),
>                  (ins VR128:$src1, i32i8imm:$src2),
>                  !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> -                 [(set FR32:$dst,
> -                  (extractelt (v4f32 VR128:$src1), imm:$src2))]>,  
> OpSize;
> +                 [/*(set GR32:$dst,
> +                  (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm: 
> $src2))*/]>,
> +           OpSize;
>   def mr : SS4AIi8<opc, MRMDestMem, (outs),
>                  (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
>                  !strconcat(OpcodeStr,
>                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
> -                 [(store (extractelt (v4f32 VR128:$src1), imm:$src2),
> +                 [(store (extractelt (bc_v4i32 (v4f32  
> VR128:$src1)), imm:$src2),
>                           addr:$dst)]>, OpSize;
> }
>
>
> Added: llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll?rev=48746&view=auto
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll Mon Mar 24  
> 16:52:23 2008
> @@ -0,0 +1,30 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
> +; RUN: grep extractps %t | count 1
> +; RUN: grep pextrd    %t | count 2
> +; RUN: grep pshufd    %t | count 1
> +
> +define void @t1(float* %R, <4 x float>* %P1) {
> +	%X = load <4 x float>* %P1
> +	%tmp = extractelement <4 x float> %X, i32 3
> +	store float %tmp, float* %R
> +	ret void
> +}
> +
> +define float @t2(<4 x float>* %P1) {
> +	%X = load <4 x float>* %P1
> +	%tmp = extractelement <4 x float> %X, i32 2
> +	ret float %tmp
> +}
> +
> +define void @t3(i32* %R, <4 x i32>* %P1) {
> +	%X = load <4 x i32>* %P1
> +	%tmp = extractelement <4 x i32> %X, i32 3
> +	store i32 %tmp, i32* %R
> +	ret void
> +}
> +
> +define i32 @t4(<4 x i32>* %P1) {
> +	%X = load <4 x i32>* %P1
> +	%tmp = extractelement <4 x i32> %X, i32 3
> +	ret i32 %tmp
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list