[PATCH 1/3] R600: Implement custom SDIVREM.

Matt Arsenault arsenm2 at gmail.com
Sat Jun 21 17:59:38 PDT 2014


On Jun 21, 2014, at 4:24 PM, Jan Vesely <jan.vesely at rutgers.edu> wrote:

> Instead of separate SDIV/SREM.
> SDIV used UDIV which in turn used UDIVREM anyway.
> SREM used SDIV(UDIV->UDIVREM)+MUL+SUB, using UDIVREM directly is more efficient.

Would it be possible and / or better to use part of this to implement just SDIV / SREM? I don’t really know how division works


> 
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
> lib/Target/R600/AMDGPUISelLowering.cpp | 47 +++++++++++++++++++++++++++++++---
> lib/Target/R600/AMDGPUISelLowering.h   |  1 +
> 2 files changed, 44 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index d7ae5e8..d613659 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -234,10 +234,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
>   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
>   for (MVT VT : ScalarIntVTs) {
>     setOperationAction(ISD::SREM, VT, Expand);
> -    setOperationAction(ISD::SDIV, VT, Custom);
> +    setOperationAction(ISD::SDIV, VT, Expand);
> 
>     // GPU does not have divrem function for signed or unsigned.
> -    setOperationAction(ISD::SDIVREM, VT, Expand);
> +    setOperationAction(ISD::SDIVREM, VT, Custom);
>     setOperationAction(ISD::UDIVREM, VT, Custom);
> 
>     // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
> @@ -297,13 +297,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
>     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
>     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
>     // TODO: Implement custom UREM / SREM routines.
> -    setOperationAction(ISD::SDIV, VT, Custom);
> +    setOperationAction(ISD::SDIV, VT, Expand);
>     setOperationAction(ISD::UDIV, VT, Expand);
>     setOperationAction(ISD::SREM, VT, Expand);
>     setOperationAction(ISD::UREM, VT, Expand);
>     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
>     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
> -    setOperationAction(ISD::SDIVREM, VT, Expand);
> +    setOperationAction(ISD::SDIVREM, VT, Custom);
>     setOperationAction(ISD::UDIVREM, VT, Custom);
>     setOperationAction(ISD::SELECT, VT, Expand);
>     setOperationAction(ISD::VSELECT, VT, Expand);
> @@ -510,6 +510,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
>   case ISD::SDIV: return LowerSDIV(Op, DAG);
>   case ISD::SREM: return LowerSREM(Op, DAG);
>   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
> +  case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
>   case ISD::FCEIL: return LowerFCEIL(Op, DAG);
>   case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
>   case ISD::FRINT: return LowerFRINT(Op, DAG);
> @@ -1618,6 +1619,44 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
>   return DAG.getMergeValues(Ops, DL);
> }
> 
> +SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
> +                                           SelectionDAG &DAG) const {
> +  SDLoc DL(Op);
> +  EVT VT = Op.getValueType();
> +
> +  SDValue ZERO = DAG.getConstant(0, VT);
> +  SDValue NEG_ONE = DAG.getConstant(-1, VT);
> +
> +  SDValue LHS = Op.getOperand(0);
> +  SDValue RHS = Op.getOperand(1);
> +
> +  SDValue LHSIGN = DAG.getSelectCC(DL, LHS, ZERO, NEG_ONE, ZERO, ISD::SETLT);
> +  SDValue RHSIGN = DAG.getSelectCC(DL, RHS, ZERO, NEG_ONE, ZERO, ISD::SETLT);
> +  SDValue DSIGN = DAG.getNode(ISD::XOR, DL, VT, LHSIGN, RHSIGN);
> +  SDValue RSIGN = LHSIGN; // Remainder sign is the same as LHS
> +
> +  LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSIGN);
> +  RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSIGN);
> +
> +  LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSIGN);
> +  RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSIGN);
> +
> +  SDValue DIV = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
> +  SDValue REM = DIV.getValue(1);
> +
> +  DIV = DAG.getNode(ISD::XOR, DL, VT, DIV, DSIGN);
> +  REM = DAG.getNode(ISD::XOR, DL, VT, REM, RSIGN);
> +
> +  DIV = DAG.getNode(ISD::SUB, DL, VT, DIV, DSIGN);
> +  REM = DAG.getNode(ISD::SUB, DL, VT, REM, RSIGN);
> +
> +  SDValue Res[2] = {
> +    DIV,
> +    REM
> +  };
> +  return DAG.getMergeValues(Res, DL);
> +}
> +

Don’t use all caps variable names. Can you add tests? I assume these are already covered, but probably should be checking something.
Other than those things LGTM





More information about the llvm-commits mailing list