[PATCH v2 3/3] R600: Try to use lower types for 64bit division if possible

Matt Arsenault Matthew.Arsenault at amd.com
Mon Nov 17 11:42:31 PST 2014


On 11/15/2014 02:51 PM, Jan Vesely wrote:
> v2: rebase
>
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>
>   The 2464 parts reuire patch 2/3
>
>   lib/Target/R600/AMDGPUISelLowering.cpp |  50 +++++++++---
>   lib/Target/R600/AMDGPUISelLowering.h   |   2 +-
>   test/CodeGen/R600/sdivrem64.ll         | 136 +++++++++++++++++++++++++++++++++
>   test/CodeGen/R600/udivrem64.ll         |  58 +++++++++++++-
>   4 files changed, 231 insertions(+), 15 deletions(-)
>   create mode 100644 test/CodeGen/R600/sdivrem64.ll
>
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index f73f70e..52f1186 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -1591,6 +1591,20 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
>     SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
>     SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
>   
> +  if (VT == MVT::i64 &&
> +    DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
> +    DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
> +
> +    SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
> +                              LHS_Lo, RHS_Lo);
> +
> +    SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(0), zero);
> +    SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(1), zero);
> +    Results.push_back(DIV);
> +    Results.push_back(REM);
> +    return;
> +  }
> +
>     // Get Speculative values
>     SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
>     SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
> @@ -1652,8 +1666,8 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
>     SDValue Den = Op.getOperand(1);
>   
>     if (VT == MVT::i32) {
> -    if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) &&
> -        DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) {
> +    if (DAG.MaskedValueIsZero(Num, APInt::getHighBitsSet(32, 8)) &&
> +        DAG.MaskedValueIsZero(Den, APInt::getHighBitsSet(32, 8))) {
>         // TODO: We technically could do this for i64, but shouldn't that just be
>         // handled by something generally reducing 64-bit division on 32-bit
>         // values to 32-bit?
> @@ -1765,19 +1779,31 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
>     SDValue LHS = Op.getOperand(0);
>     SDValue RHS = Op.getOperand(1);
>   
> -  if (VT == MVT::i32) {
> -    if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 &&
> -        DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) {
> -      // TODO: We technically could do this for i64, but shouldn't that just be
> -      // handled by something generally reducing 64-bit division on 32-bit
> -      // values to 32-bit?
> -      return LowerDIVREM24(Op, DAG, true);
> -    }
> -  }
> -
>     SDValue Zero = DAG.getConstant(0, VT);
>     SDValue NegOne = DAG.getConstant(-1, VT);
>   
> +  if (VT == MVT::i32 &&
> +      DAG.ComputeNumSignBits(LHS) > 8 &&
> +      DAG.ComputeNumSignBits(RHS) > 8) {
> +    return LowerDIVREM24(Op, DAG, true);
> +  }
> +  if (VT == MVT::i64 &&
> +      DAG.ComputeNumSignBits(LHS) > 32 &&
> +      DAG.ComputeNumSignBits(RHS) > 32) {
> +    EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
> +
> +    //HiLo split
> +    SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
> +    SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
> +    SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
> +                                 LHS_Lo, RHS_Lo);
> +    SDValue Res[2] = {
> +      DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
> +      DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
> +    };
> +    return DAG.getMergeValues(Res, DL);
> +  }
> +
>     SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
>     SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
>     SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
> diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
> index 8f2f917..3ce2257 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.h
> +++ b/lib/Target/R600/AMDGPUISelLowering.h
> @@ -43,7 +43,6 @@ private:
>     /// \brief Split a vector store into multiple scalar stores.
>     /// \returns The resulting chain.
>   
> -  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
>     SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
>     SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
>     SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
> @@ -86,6 +85,7 @@ protected:
>     SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
>     SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
>     SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
>     SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
>     void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
>                                       SmallVectorImpl<SDValue> &Results) const;
> diff --git a/test/CodeGen/R600/sdivrem64.ll b/test/CodeGen/R600/sdivrem64.ll
> new file mode 100644
> index 0000000..cf871fc
> --- /dev/null
> +++ b/test/CodeGen/R600/sdivrem64.ll
> @@ -0,0 +1,136 @@
> +;XUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
> +;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
Is the broken SI run line intentional?

> +
> +;FUNC-LABEL: {{^}}test_sdiv:
> +;EG: RECIP_UINT
> +;EG: LSHL {{.*}}, 1,
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;SI: S_ENDPGM
> +define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %result = sdiv i64 %x, %y
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
> +
> +;FUNC-LABEL: {{^}}test_srem:
> +;EG: RECIP_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: BFE_UINT
> +;EG: AND_INT {{.*}}, 1,
> +;SI: S_ENDPGM
> +define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %result = urem i64 %x, %y
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
> +
> +;FUNC-LABEL: {{^}}test_sdiv3264:
> +;EG: RECIP_UINT
> +;EG-NOT: BFE_UINT
> +;SI: S_ENDPGM
> +define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %1 = ashr i64 %x, 33
> +  %2 = ashr i64 %y, 33
> +  %result = sdiv i64 %1, %2
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
> +
> +;FUNC-LABEL: {{^}}test_srem3264:
> +;EG: RECIP_UINT
> +;EG-NOT: BFE_UINT
> +;SI: S_ENDPGM
> +define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %1 = ashr i64 %x, 33
> +  %2 = ashr i64 %y, 33
> +  %result = srem i64 %1, %2
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
> +
> +;FUNC-LABEL: {{^}}test_sdiv2464:
> +;EG: INT_TO_FLT
> +;EG: INT_TO_FLT
> +;EG: FLT_TO_INT
> +;EG-NOT: RECIP_UINT
> +;EG-NOT: BFE_UINT
> +;SI: S_ENDPGM
> +define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %1 = ashr i64 %x, 40
> +  %2 = ashr i64 %y, 40
> +  %result = sdiv i64 %1, %2
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
> +
> +;FUNC-LABEL: {{^}}test_srem2464:
> +;EG: INT_TO_FLT
> +;EG: INT_TO_FLT
> +;EG: FLT_TO_INT
> +;EG-NOT: RECIP_UINT
> +;EG-NOT: BFE_UINT
> +;SI: S_ENDPGM
> +define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %1 = ashr i64 %x, 40
> +  %2 = ashr i64 %y, 40
> +  %result = srem i64 %1, %2
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
> diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll
> index 8864c83..eefb985 100644
> --- a/test/CodeGen/R600/udivrem64.ll
> +++ b/test/CodeGen/R600/udivrem64.ll
> @@ -1,5 +1,5 @@
> -;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
> -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
> +;XUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
> +;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
>   
>   ;FUNC-LABEL: {{^}}test_udiv:
>   ;EG: RECIP_UINT
> @@ -80,3 +80,57 @@ define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
>     store i64 %result, i64 addrspace(1)* %out
>     ret void
>   }
> +
> +;FUNC-LABEL: {{^}}test_udiv3264:
> +;EG: RECIP_UINT
> +;EG-NOT: BFE_UINT
> +;SI: S_ENDPGM
> +define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %1 = lshr i64 %x, 33
> +  %2 = lshr i64 %y, 33
> +  %result = udiv i64 %1, %2
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
> +
> +;FUNC-LABEL: {{^}}test_urem3264:
> +;EG: RECIP_UINT
> +;EG-NOT: BFE_UINT
> +;SI: S_ENDPGM
> +define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %1 = lshr i64 %x, 33
> +  %2 = lshr i64 %y, 33
> +  %result = urem i64 %1, %2
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
> +
> +;FUNC-LABEL: {{^}}test_udiv2464:
> +;EG: UINT_TO_FLT
> +;EG: UINT_TO_FLT
> +;EG: FLT_TO_UINT
> +;EG-NOT: RECIP_UINT
> +;EG-NOT: BFE_UINT
> +;SI: S_ENDPGM
> +define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %1 = lshr i64 %x, 40
> +  %2 = lshr i64 %y, 40
> +  %result = udiv i64 %1, %2
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}
> +
> +;FUNC-LABEL: {{^}}test_urem2464:
> +;EG: UINT_TO_FLT
> +;EG: UINT_TO_FLT
> +;EG: FLT_TO_UINT
> +;EG-NOT: RECIP_UINT
> +;EG-NOT: BFE_UINT
> +;SI: S_ENDPGM
> +define void @test_urem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
> +  %1 = lshr i64 %x, 40
> +  %2 = lshr i64 %y, 40
> +  %result = urem i64 %1, %2
> +  store i64 %result, i64 addrspace(1)* %out
> +  ret void
> +}




More information about the llvm-commits mailing list