[llvm-commits] [llvm] r98889 - in /llvm/trunk/lib/Target/ARM: ARMISelLowering.cpp ARMISelLowering.h ARMInstrFormats.td ARMInstrInfo.td ARMInstrNEON.td ARMInstrVFP.td
Bob Wilson
bob.wilson at apple.com
Fri Mar 19 15:13:05 PDT 2010
Anton,
This looks like a nice cleanup but it is causing performance regressions. Since I don't get the impression from your commit message that this is critical to your work, I'm going to revert it for now. If I can find a small testcase, I will certainly send it to you. Otherwise, I may see if I can figure out what's going wrong. Basically I'm seeing lots of things like:
vmov r2, s2
vmov s2, r2
On Mar 18, 2010, at 3:35 PM, Anton Korobeynikov wrote:
> Author: asl
> Date: Thu Mar 18 17:35:45 2010
> New Revision: 98889
>
> URL: http://llvm.org/viewvc/llvm-project?rev=98889&view=rev
> Log:
> Get rid of target-specific fp <-> int nodes when still I'm here.
>
> Modified:
> llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
> llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
> llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=98889&r1=98888&r2=98889&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Mar 18 17:35:45 2010
> @@ -428,13 +428,6 @@
>
> // Various VFP goodness
> if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
> - // int <-> fp are custom expanded into bit_convert + ARMISD ops.
> - if (Subtarget->hasVFP2()) {
> - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
> - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
> - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
> - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
> - }
> // Special handling for half-precision FP.
> if (!Subtarget->hasFP16()) {
> setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
> @@ -495,11 +488,6 @@
>
> case ARMISD::RBIT: return "ARMISD::RBIT";
>
> - case ARMISD::FTOSI: return "ARMISD::FTOSI";
> - case ARMISD::FTOUI: return "ARMISD::FTOUI";
> - case ARMISD::SITOF: return "ARMISD::SITOF";
> - case ARMISD::UITOF: return "ARMISD::UITOF";
> -
> case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
> case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
> case ARMISD::RRX: return "ARMISD::RRX";
> @@ -1978,44 +1966,6 @@
> }
> }
>
> -static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
> - DebugLoc dl = Op.getDebugLoc();
> - unsigned Opc;
> -
> - switch (Op.getOpcode()) {
> - default:
> - assert(0 && "Invalid opcode!");
> - case ISD::FP_TO_SINT:
> - Opc = ARMISD::FTOSI;
> - break;
> - case ISD::FP_TO_UINT:
> - Opc = ARMISD::FTOUI;
> - break;
> - }
> - Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
> - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
> -}
> -
> -static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
> - EVT VT = Op.getValueType();
> - DebugLoc dl = Op.getDebugLoc();
> - unsigned Opc;
> -
> - switch (Op.getOpcode()) {
> - default:
> - assert(0 && "Invalid opcode!");
> - case ISD::SINT_TO_FP:
> - Opc = ARMISD::SITOF;
> - break;
> - case ISD::UINT_TO_FP:
> - Opc = ARMISD::UITOF;
> - break;
> - }
> -
> - Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
> - return DAG.getNode(Opc, dl, VT, Op);
> -}
> -
> static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
> // Implement fcopysign with a fabs and a conditional fneg.
> SDValue Tmp0 = Op.getOperand(0);
> @@ -3070,10 +3020,6 @@
> case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
> case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
> case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
> - case ISD::SINT_TO_FP:
> - case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
> - case ISD::FP_TO_SINT:
> - case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
> case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
> case ISD::RETURNADDR: break;
> case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=98889&r1=98888&r2=98889&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Thu Mar 18 17:35:45 2010
> @@ -55,11 +55,6 @@
>
> RBIT, // ARM bitreverse instruction
>
> - FTOSI, // FP to sint within a FP register.
> - FTOUI, // FP to uint within a FP register.
> - SITOF, // sint to FP within a FP register.
> - UITOF, // uint to FP within a FP register.
> -
> SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
> SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
> RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=98889&r1=98888&r2=98889&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Thu Mar 18 17:35:45 2010
> @@ -937,6 +937,9 @@
> class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
> list<Predicate> Predicates = [IsARM, HasV6];
> }
> +class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
> + list<Predicate> Predicates = [HasVFP2];
> +}
>
> //===----------------------------------------------------------------------===//
> //
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=98889&r1=98888&r2=98889&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Thu Mar 18 17:35:45 2010
> @@ -124,6 +124,7 @@
> def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
> def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
> def HasNEON : Predicate<"Subtarget->hasNEON()">;
> +def HasFP16 : Predicate<"Subtarget->hasFP16()">;
> def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
> def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
> def IsThumb : Predicate<"Subtarget->isThumb()">;
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=98889&r1=98888&r2=98889&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Thu Mar 18 17:35:45 2010
> @@ -3139,25 +3139,44 @@
> def : N3VSPat<NEONfmin, VMINfd_sfp>;
>
> // Vector Convert between single-precision FP and integer
> +
> +class NVCVTFIPat<SDNode OpNode, NeonI Inst>
> + : NEONFPPat<(i32 (OpNode SPR:$a)),
> + (i32 (EXTRACT_SUBREG
> + (v2i32 (Inst
> + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
> + SPR:$a,
> + arm_ssubreg_0))),
> + arm_ssubreg_0))>;
> +
> +class NVCVTIFPat<SDNode OpNode, NeonI Inst>
> + : NEONFPPat<(f32 (OpNode GPR:$a)),
> + (f32 (EXTRACT_SUBREG
> + (v2f32 (Inst
> + (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
> + (i32 (COPY_TO_REGCLASS GPR:$a, SPR)),
> + arm_ssubreg_0))),
> + arm_ssubreg_0))>;
> +
> let neverHasSideEffects = 1 in
> def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
> v2i32, v2f32, fp_to_sint>;
> -def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
> +def : NVCVTFIPat<fp_to_sint, VCVTf2sd_sfp>;
>
> let neverHasSideEffects = 1 in
> def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
> v2i32, v2f32, fp_to_uint>;
> -def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
> +def : NVCVTFIPat<fp_to_uint, VCVTf2ud_sfp>;
>
> let neverHasSideEffects = 1 in
> def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
> v2f32, v2i32, sint_to_fp>;
> -def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
> +def : NVCVTIFPat<sint_to_fp, VCVTs2fd_sfp>;
>
> let neverHasSideEffects = 1 in
> def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
> v2f32, v2i32, uint_to_fp>;
> -def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
> +def : NVCVTIFPat<uint_to_fp, VCVTu2fd_sfp>;
>
> //===----------------------------------------------------------------------===//
> // Non-Instruction Patterns
>
> Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=98889&r1=98888&r2=98889&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
> +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Thu Mar 18 17:35:45 2010
> @@ -21,10 +21,6 @@
> SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
> SDTCisSameAs<1, 2>]>;
>
> -def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
> -def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
> -def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
> -def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
> def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
> def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
> def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
> @@ -263,15 +259,17 @@
> /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a",
> [/* For disassembly only; pattern left blank */]>;
>
> -def : ARMPat<(f32_to_f16 SPR:$a),
> - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
> +def : VFPPat<(f32_to_f16 SPR:$a),
> + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>,
> + Requires<[HasVFP3, HasFP16]>;
>
> def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
> /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a",
> [/* For disassembly only; pattern left blank */]>;
>
> -def : ARMPat<(f16_to_f32 GPR:$a),
> - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
> +def : VFPPat<(f16_to_f32 GPR:$a),
> + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>,
> + Requires<[HasVFP3, HasFP16]>;
>
> def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
> /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a",
> @@ -363,62 +361,90 @@
> def VSITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
> (outs DPR:$dst), (ins SPR:$a),
> IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a",
> - [(set DPR:$dst, (f64 (arm_sitof SPR:$a)))]> {
> + [/* For disassembly only; pattern left blank */]> {
> let Inst{7} = 1; // s32
> }
>
> +def : VFPPat<(f64 (sint_to_fp GPR:$a)),
> + (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
> +
> def VSITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
> (outs SPR:$dst),(ins SPR:$a),
> IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a",
> - [(set SPR:$dst, (arm_sitof SPR:$a))]> {
> + [/* For disassembly only; pattern left blank */]> {
> let Inst{7} = 1; // s32
> }
>
> +def : VFPPat<(f32 (sint_to_fp GPR:$a)),
> + (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>,
> + Requires<[DontUseNEONForFP, HasVFP2]>;
> +
> def VUITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
> (outs DPR:$dst), (ins SPR:$a),
> IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a",
> - [(set DPR:$dst, (f64 (arm_uitof SPR:$a)))]> {
> + [/* For disassembly only; pattern left blank */]> {
> let Inst{7} = 0; // u32
> }
>
> +def : VFPPat<(f64 (uint_to_fp GPR:$a)),
> + (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
> +
> def VUITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
> (outs SPR:$dst), (ins SPR:$a),
> IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a",
> - [(set SPR:$dst, (arm_uitof SPR:$a))]> {
> + [/* For disassembly only; pattern left blank */]> {
> let Inst{7} = 0; // u32
> }
>
> +def : VFPPat<(f32 (uint_to_fp GPR:$a)),
> + (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>,
> + Requires<[DontUseNEONForFP, HasVFP2]>;
> +
> // FP to Int:
> // Always set Z bit in the instruction, i.e. "round towards zero" variants.
>
> def VTOSIZD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
> (outs SPR:$dst), (ins DPR:$a),
> IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a",
> - [(set SPR:$dst, (arm_ftosi (f64 DPR:$a)))]> {
> + [/* For disassembly only; pattern left blank */]> {
> let Inst{7} = 1; // Z bit
> }
>
> +def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
> + (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
> +
> def VTOSIZS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
> (outs SPR:$dst), (ins SPR:$a),
> IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a",
> - [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
> + [/* For disassembly only; pattern left blank */]> {
> let Inst{7} = 1; // Z bit
> }
>
> +def : VFPPat<(i32 (fp_to_sint SPR:$a)),
> + (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>,
> + Requires<[DontUseNEONForFP, HasVFP2]>;
> +
> def VTOUIZD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
> (outs SPR:$dst), (ins DPR:$a),
> IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a",
> - [(set SPR:$dst, (arm_ftoui (f64 DPR:$a)))]> {
> + [/* For disassembly only; pattern left blank */]> {
> let Inst{7} = 1; // Z bit
> }
>
> +def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
> + (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
> +
> def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
> (outs SPR:$dst), (ins SPR:$a),
> IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a",
> - [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
> + [/* For disassembly only; pattern left blank */]> {
> let Inst{7} = 1; // Z bit
> }
>
> +def : VFPPat<(i32 (fp_to_uint SPR:$a)),
> + (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>,
> + Requires<[DontUseNEONForFP, HasVFP2]>;
> +
> // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
> // For disassembly only.
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list