[PATCH] R600: Factor i64 UDIVREM lowering into its own fuction
Jan Vesely
jan.vesely at rutgers.edu
Thu Oct 16 18:52:22 PDT 2014
On Thu, 2014-10-16 at 20:09 -0400, Tom Stellard wrote:
> This is so it could potentially be used by SI. However, the current
> implementation does not always produce correct results, so the
> IntegerDivisionPass is being used instead.
LGTM
> ---
> lib/Target/R600/AMDGPUISelLowering.cpp | 81 ++++++++++++++++++++++++++++++++++
> lib/Target/R600/AMDGPUISelLowering.h | 2 +
> lib/Target/R600/R600ISelLowering.cpp | 69 +----------------------------
> 3 files changed, 84 insertions(+), 68 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index bac4ca0..63d3ef8 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -1502,11 +1502,92 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
> return DAG.getMergeValues(Res, DL);
> }
>
> +void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
> + SelectionDAG &DAG,
> + SmallVectorImpl<SDValue> &Results) const {
> + assert(Op.getValueType() == MVT::i64);
> +
> + SDLoc DL(Op);
> + EVT VT = Op.getValueType();
> + EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
> +
> + SDValue one = DAG.getConstant(1, HalfVT);
> + SDValue zero = DAG.getConstant(0, HalfVT);
> +
> + //HiLo split
> + SDValue LHS = Op.getOperand(0);
> + SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
> + SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
> +
> + SDValue RHS = Op.getOperand(1);
> + SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
> + SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
> +
> + // Get Speculative values
> + SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
> + SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
> +
> + SDValue REM_Hi = zero;
> + SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
> +
> + SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
> + SDValue DIV_Lo = zero;
> +
> + const unsigned halfBitWidth = HalfVT.getSizeInBits();
> +
> + for (unsigned i = 0; i < halfBitWidth; ++i) {
> + SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
> + // Get Value of high bit
> + SDValue HBit;
> + if (halfBitWidth == 32 && Subtarget->hasBFE()) {
> + HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
> + } else {
> + HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
> + HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
> + }
> +
> + SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
> + DAG.getConstant(halfBitWidth - 1, HalfVT));
> + REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
> + REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
> +
> + REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
> + REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
> +
> +
> + SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
> +
> + SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
> + SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
> +
> + DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
> +
> + // Update REM
> +
> + SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
> +
> + REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
> + REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
> + REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
> + }
> +
> + SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
> + SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
> + Results.push_back(DIV);
> + Results.push_back(REM);
> +}
> +
> SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
> SelectionDAG &DAG) const {
> SDLoc DL(Op);
> EVT VT = Op.getValueType();
>
> + if (VT == MVT::i64) {
> + SmallVector<SDValue, 2> Results;
> + LowerUDIVREM64(Op, DAG, Results);
> + return DAG.getMergeValues(Results, DL);
> + }
> +
> SDValue Num = Op.getOperand(0);
> SDValue Den = Op.getOperand(1);
>
> diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
> index 05068a5..c7078db 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.h
> +++ b/lib/Target/R600/AMDGPUISelLowering.h
> @@ -90,6 +90,8 @@ protected:
> SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
> + void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
> + SmallVectorImpl<SDValue> &Results) const;
> bool isHWTrueValue(SDValue Op) const;
> bool isHWFalseValue(SDValue Op) const;
>
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index dfc0eb1..a214e53 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -907,74 +907,7 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
> }
> case ISD::UDIVREM: {
> SDValue Op = SDValue(N, 0);
> - SDLoc DL(Op);
> - EVT VT = Op.getValueType();
> - EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
> -
> - SDValue one = DAG.getConstant(1, HalfVT);
> - SDValue zero = DAG.getConstant(0, HalfVT);
> -
> - //HiLo split
> - SDValue LHS = N->getOperand(0);
> - SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
> - SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
> -
> - SDValue RHS = N->getOperand(1);
> - SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
> - SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
> -
> - // Get Speculative values
> - SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
> - SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
> -
> - SDValue REM_Hi = zero;
> - SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
> -
> - SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
> - SDValue DIV_Lo = zero;
> -
> - const unsigned halfBitWidth = HalfVT.getSizeInBits();
> -
> - for (unsigned i = 0; i < halfBitWidth; ++i) {
> - SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
> - // Get Value of high bit
> - SDValue HBit;
> - if (halfBitWidth == 32 && Subtarget->hasBFE()) {
> - HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
> - } else {
> - HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
> - HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
> - }
> -
> - SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
> - DAG.getConstant(halfBitWidth - 1, HalfVT));
> - REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
> - REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
> -
> - REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
> - REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
> -
> -
> - SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
> -
> - SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
> - SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
> -
> - DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
> -
> - // Update REM
> -
> - SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
> -
> - REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
> - REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
> - REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
> - }
> -
> - SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
> - SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
> - Results.push_back(DIV);
> - Results.push_back(REM);
> + LowerUDIVREM64(Op, DAG, Results);
> break;
> }
> }
--
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141016/dec37cb6/attachment.sig>
More information about the llvm-commits
mailing list