[PATCH 3/3] R600: Remove LowerSDIV/SREM functions.
Matt Arsenault
arsenm2 at gmail.com
Sat Jun 21 18:05:30 PDT 2014
On Jun 21, 2014, at 4:24 PM, Jan Vesely <jan.vesely at rutgers.edu> wrote:
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>
> I'm not sure about the SDIV/SREM24 versions, but the comment suggested
> they are unused anyway.
It wouldn’t be hard to get these to work again, it just requires changing the type check with a range check based on computeMaskedBits for the operands. I would rather fix that instead of deleting these (unless these don’t actually end up emitting better code than the normal implementation)
>
> lib/Target/R600/AMDGPUISelLowering.cpp | 247 ---------------------------------
> lib/Target/R600/AMDGPUISelLowering.h | 7 -
> 2 files changed, 254 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index f473d1d..0dabbdd 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -296,7 +296,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> setOperationAction(ISD::SUB, VT, Expand);
> setOperationAction(ISD::SINT_TO_FP, VT, Expand);
> setOperationAction(ISD::UINT_TO_FP, VT, Expand);
> - // TODO: Implement custom UREM / SREM routines.
> setOperationAction(ISD::SDIV, VT, Expand);
> setOperationAction(ISD::UDIV, VT, Expand);
> setOperationAction(ISD::SREM, VT, Expand);
> @@ -507,8 +506,6 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
> case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
> case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
> case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
> - case ISD::SDIV: return LowerSDIV(Op, DAG);
> - case ISD::SREM: return LowerSREM(Op, DAG);
> case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
> case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
> case ISD::FCEIL: return LowerFCEIL(Op, DAG);
> @@ -1296,250 +1293,6 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
> return SDValue();
> }
>
> -SDValue AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
> - SDLoc DL(Op);
> - EVT OVT = Op.getValueType();
> - SDValue LHS = Op.getOperand(0);
> - SDValue RHS = Op.getOperand(1);
> - MVT INTTY;
> - MVT FLTTY;
> - if (!OVT.isVector()) {
> - INTTY = MVT::i32;
> - FLTTY = MVT::f32;
> - } else if (OVT.getVectorNumElements() == 2) {
> - INTTY = MVT::v2i32;
> - FLTTY = MVT::v2f32;
> - } else if (OVT.getVectorNumElements() == 4) {
> - INTTY = MVT::v4i32;
> - FLTTY = MVT::v4f32;
> - }
> - unsigned bitsize = OVT.getScalarType().getSizeInBits();
> - // char|short jq = ia ^ ib;
> - SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
> -
> - // jq = jq >> (bitsize - 2)
> - jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
> -
> - // jq = jq | 0x1
> - jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
> -
> - // jq = (int)jq
> - jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
> -
> - // int ia = (int)LHS;
> - SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
> -
> - // int ib, (int)RHS;
> - SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
> -
> - // float fa = (float)ia;
> - SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
> -
> - // float fb = (float)ib;
> - SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
> -
> - // float fq = native_divide(fa, fb);
> - SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
> -
> - // fq = trunc(fq);
> - fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
> -
> - // float fqneg = -fq;
> - SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
> -
> - // float fr = mad(fqneg, fb, fa);
> - SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
> - DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
> -
> - // int iq = (int)fq;
> - SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
> -
> - // fr = fabs(fr);
> - fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
> -
> - // fb = fabs(fb);
> - fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
> -
> - // int cv = fr >= fb;
> - SDValue cv;
> - if (INTTY == MVT::i32) {
> - cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
> - } else {
> - cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
> - }
> - // jq = (cv ? jq : 0);
> - jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
> - DAG.getConstant(0, OVT));
> - // dst = iq + jq;
> - iq = DAG.getSExtOrTrunc(iq, DL, OVT);
> - iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
> - return iq;
> -}
> -
> -SDValue AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
> - SDLoc DL(Op);
> - EVT OVT = Op.getValueType();
> - SDValue LHS = Op.getOperand(0);
> - SDValue RHS = Op.getOperand(1);
> - // The LowerSDIV32 function generates equivalent to the following IL.
> - // mov r0, LHS
> - // mov r1, RHS
> - // ilt r10, r0, 0
> - // ilt r11, r1, 0
> - // iadd r0, r0, r10
> - // iadd r1, r1, r11
> - // ixor r0, r0, r10
> - // ixor r1, r1, r11
> - // udiv r0, r0, r1
> - // ixor r10, r10, r11
> - // iadd r0, r0, r10
> - // ixor DST, r0, r10
> -
> - // mov r0, LHS
> - SDValue r0 = LHS;
> -
> - // mov r1, RHS
> - SDValue r1 = RHS;
> -
> - // ilt r10, r0, 0
> - SDValue r10 = DAG.getSelectCC(DL,
> - r0, DAG.getConstant(0, OVT),
> - DAG.getConstant(-1, OVT),
> - DAG.getConstant(0, OVT),
> - ISD::SETLT);
> -
> - // ilt r11, r1, 0
> - SDValue r11 = DAG.getSelectCC(DL,
> - r1, DAG.getConstant(0, OVT),
> - DAG.getConstant(-1, OVT),
> - DAG.getConstant(0, OVT),
> - ISD::SETLT);
> -
> - // iadd r0, r0, r10
> - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
> -
> - // iadd r1, r1, r11
> - r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
> -
> - // ixor r0, r0, r10
> - r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
> -
> - // ixor r1, r1, r11
> - r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
> -
> - // udiv r0, r0, r1
> - r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
> -
> - // ixor r10, r10, r11
> - r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
> -
> - // iadd r0, r0, r10
> - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
> -
> - // ixor DST, r0, r10
> - SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
> - return DST;
> -}
> -
> -SDValue AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
> - return SDValue(Op.getNode(), 0);
> -}
> -
> -SDValue AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
> - EVT OVT = Op.getValueType().getScalarType();
> -
> - if (OVT == MVT::i64)
> - return LowerSDIV64(Op, DAG);
> -
> - if (OVT.getScalarType() == MVT::i32)
> - return LowerSDIV32(Op, DAG);
> -
> - if (OVT == MVT::i16 || OVT == MVT::i8) {
> - // FIXME: We should be checking for the masked bits. This isn't reached
> - // because i8 and i16 are not legal types.
> - return LowerSDIV24(Op, DAG);
> - }
> -
> - return SDValue(Op.getNode(), 0);
> -}
> -
> -SDValue AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
> - SDLoc DL(Op);
> - EVT OVT = Op.getValueType();
> - SDValue LHS = Op.getOperand(0);
> - SDValue RHS = Op.getOperand(1);
> - // The LowerSREM32 function generates equivalent to the following IL.
> - // mov r0, LHS
> - // mov r1, RHS
> - // ilt r10, r0, 0
> - // ilt r11, r1, 0
> - // iadd r0, r0, r10
> - // iadd r1, r1, r11
> - // ixor r0, r0, r10
> - // ixor r1, r1, r11
> - // udiv r20, r0, r1
> - // umul r20, r20, r1
> - // sub r0, r0, r20
> - // iadd r0, r0, r10
> - // ixor DST, r0, r10
> -
> - // mov r0, LHS
> - SDValue r0 = LHS;
> -
> - // mov r1, RHS
> - SDValue r1 = RHS;
> -
> - // ilt r10, r0, 0
> - SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
> -
> - // ilt r11, r1, 0
> - SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
> -
> - // iadd r0, r0, r10
> - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
> -
> - // iadd r1, r1, r11
> - r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
> -
> - // ixor r0, r0, r10
> - r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
> -
> - // ixor r1, r1, r11
> - r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
> -
> - // udiv r20, r0, r1
> - SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
> -
> - // umul r20, r20, r1
> - r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
> -
> - // sub r0, r0, r20
> - r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
> -
> - // iadd r0, r0, r10
> - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
> -
> - // ixor DST, r0, r10
> - SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
> - return DST;
> -}
> -
> -SDValue AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
> - return SDValue(Op.getNode(), 0);
> -}
> -
> -SDValue AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
> - EVT OVT = Op.getValueType();
> -
> - if (OVT.getScalarType() == MVT::i64)
> - return LowerSREM64(Op, DAG);
> -
> - if (OVT.getScalarType() == MVT::i32)
> - return LowerSREM32(Op, DAG);
> -
> - return SDValue(Op.getNode(), 0);
> -}
> -
> SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
> SelectionDAG &DAG) const {
> SDLoc DL(Op);
> diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
> index 0db89ecb..4d83ddd 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.h
> +++ b/lib/Target/R600/AMDGPUISelLowering.h
> @@ -43,13 +43,6 @@ private:
> /// \brief Split a vector store into multiple scalar stores.
> /// \returns The resulting chain.
>
> - SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
> --
> 1.9.3
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list