[PATCH 1/3] R600: Implement custom SDIVREM.
Matt Arsenault
arsenm2 at gmail.com
Sat Jun 21 17:59:38 PDT 2014
On Jun 21, 2014, at 4:24 PM, Jan Vesely <jan.vesely at rutgers.edu> wrote:
> Instead of separate SDIV/SREM.
> SDIV used UDIV which in turn used UDIVREM anyway.
> SREM used SDIV(UDIV->UDIVREM)+MUL+SUB, using UDIVREM directly is more efficient.
Would it be possible and / or better to use part of this to implement just SDIV / SREM? I don’t really know how division works
>
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
> lib/Target/R600/AMDGPUISelLowering.cpp | 47 +++++++++++++++++++++++++++++++---
> lib/Target/R600/AMDGPUISelLowering.h | 1 +
> 2 files changed, 44 insertions(+), 4 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index d7ae5e8..d613659 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -234,10 +234,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
> for (MVT VT : ScalarIntVTs) {
> setOperationAction(ISD::SREM, VT, Expand);
> - setOperationAction(ISD::SDIV, VT, Custom);
> + setOperationAction(ISD::SDIV, VT, Expand);
>
> // GPU does not have divrem function for signed or unsigned.
> - setOperationAction(ISD::SDIVREM, VT, Expand);
> + setOperationAction(ISD::SDIVREM, VT, Custom);
> setOperationAction(ISD::UDIVREM, VT, Custom);
>
> // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
> @@ -297,13 +297,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> setOperationAction(ISD::SINT_TO_FP, VT, Expand);
> setOperationAction(ISD::UINT_TO_FP, VT, Expand);
> // TODO: Implement custom UREM / SREM routines.
> - setOperationAction(ISD::SDIV, VT, Custom);
> + setOperationAction(ISD::SDIV, VT, Expand);
> setOperationAction(ISD::UDIV, VT, Expand);
> setOperationAction(ISD::SREM, VT, Expand);
> setOperationAction(ISD::UREM, VT, Expand);
> setOperationAction(ISD::SMUL_LOHI, VT, Expand);
> setOperationAction(ISD::UMUL_LOHI, VT, Expand);
> - setOperationAction(ISD::SDIVREM, VT, Expand);
> + setOperationAction(ISD::SDIVREM, VT, Custom);
> setOperationAction(ISD::UDIVREM, VT, Custom);
> setOperationAction(ISD::SELECT, VT, Expand);
> setOperationAction(ISD::VSELECT, VT, Expand);
> @@ -510,6 +510,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
> case ISD::SDIV: return LowerSDIV(Op, DAG);
> case ISD::SREM: return LowerSREM(Op, DAG);
> case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
> + case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
> case ISD::FCEIL: return LowerFCEIL(Op, DAG);
> case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
> case ISD::FRINT: return LowerFRINT(Op, DAG);
> @@ -1618,6 +1619,44 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
> return DAG.getMergeValues(Ops, DL);
> }
>
> +SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
> + SelectionDAG &DAG) const {
> + SDLoc DL(Op);
> + EVT VT = Op.getValueType();
> +
> + SDValue ZERO = DAG.getConstant(0, VT);
> + SDValue NEG_ONE = DAG.getConstant(-1, VT);
> +
> + SDValue LHS = Op.getOperand(0);
> + SDValue RHS = Op.getOperand(1);
> +
> + SDValue LHSIGN = DAG.getSelectCC(DL, LHS, ZERO, NEG_ONE, ZERO, ISD::SETLT);
> + SDValue RHSIGN = DAG.getSelectCC(DL, RHS, ZERO, NEG_ONE, ZERO, ISD::SETLT);
> + SDValue DSIGN = DAG.getNode(ISD::XOR, DL, VT, LHSIGN, RHSIGN);
> + SDValue RSIGN = LHSIGN; // Remainder sign is the same as LHS
> +
> + LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSIGN);
> + RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSIGN);
> +
> + LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSIGN);
> + RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSIGN);
> +
> + SDValue DIV = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
> + SDValue REM = DIV.getValue(1);
> +
> + DIV = DAG.getNode(ISD::XOR, DL, VT, DIV, DSIGN);
> + REM = DAG.getNode(ISD::XOR, DL, VT, REM, RSIGN);
> +
> + DIV = DAG.getNode(ISD::SUB, DL, VT, DIV, DSIGN);
> + REM = DAG.getNode(ISD::SUB, DL, VT, REM, RSIGN);
> +
> + SDValue Res[2] = {
> + DIV,
> + REM
> + };
> + return DAG.getMergeValues(Res, DL);
> +}
> +
Don’t use all caps variable names. Can you add tests? I assume these are already covered, but probably should be checking something.
Other than those things LGTM
More information about the llvm-commits
mailing list