[PATCH 1/3] R600: Implement custom SDIVREM.

Jan Vesely jan.vesely at rutgers.edu
Sat Jun 21 16:24:28 PDT 2014


Instead of separate SDIV/SREM.
SDIV used UDIV which in turn used UDIVREM anyway.
SREM used SDIV(UDIV->UDIVREM)+MUL+SUB, using UDIVREM directly is more efficient.

Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 47 +++++++++++++++++++++++++++++++---
 lib/Target/R600/AMDGPUISelLowering.h   |  1 +
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index d7ae5e8..d613659 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -234,10 +234,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
   for (MVT VT : ScalarIntVTs) {
     setOperationAction(ISD::SREM, VT, Expand);
-    setOperationAction(ISD::SDIV, VT, Custom);
+    setOperationAction(ISD::SDIV, VT, Expand);
 
     // GPU does not have divrem function for signed or unsigned.
-    setOperationAction(ISD::SDIVREM, VT, Expand);
+    setOperationAction(ISD::SDIVREM, VT, Custom);
     setOperationAction(ISD::UDIVREM, VT, Custom);
 
     // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
@@ -297,13 +297,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
     // TODO: Implement custom UREM / SREM routines.
-    setOperationAction(ISD::SDIV, VT, Custom);
+    setOperationAction(ISD::SDIV, VT, Expand);
     setOperationAction(ISD::UDIV, VT, Expand);
     setOperationAction(ISD::SREM, VT, Expand);
     setOperationAction(ISD::UREM, VT, Expand);
     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-    setOperationAction(ISD::SDIVREM, VT, Expand);
+    setOperationAction(ISD::SDIVREM, VT, Custom);
     setOperationAction(ISD::UDIVREM, VT, Custom);
     setOperationAction(ISD::SELECT, VT, Expand);
     setOperationAction(ISD::VSELECT, VT, Expand);
@@ -510,6 +510,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::SDIV: return LowerSDIV(Op, DAG);
   case ISD::SREM: return LowerSREM(Op, DAG);
   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+  case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
   case ISD::FCEIL: return LowerFCEIL(Op, DAG);
   case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
   case ISD::FRINT: return LowerFRINT(Op, DAG);
@@ -1618,6 +1619,44 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
   return DAG.getMergeValues(Ops, DL);
 }
 
+SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  SDValue ZERO = DAG.getConstant(0, VT);
+  SDValue NEG_ONE = DAG.getConstant(-1, VT);
+
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+
+  SDValue LHSIGN = DAG.getSelectCC(DL, LHS, ZERO, NEG_ONE, ZERO, ISD::SETLT);
+  SDValue RHSIGN = DAG.getSelectCC(DL, RHS, ZERO, NEG_ONE, ZERO, ISD::SETLT);
+  SDValue DSIGN = DAG.getNode(ISD::XOR, DL, VT, LHSIGN, RHSIGN);
+  SDValue RSIGN = LHSIGN; // Remainder sign is the same as LHS
+
+  LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSIGN);
+  RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSIGN);
+
+  LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSIGN);
+  RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSIGN);
+
+  SDValue DIV = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
+  SDValue REM = DIV.getValue(1);
+
+  DIV = DAG.getNode(ISD::XOR, DL, VT, DIV, DSIGN);
+  REM = DAG.getNode(ISD::XOR, DL, VT, REM, RSIGN);
+
+  DIV = DAG.getNode(ISD::SUB, DL, VT, DIV, DSIGN);
+  REM = DAG.getNode(ISD::SUB, DL, VT, REM, RSIGN);
+
+  SDValue Res[2] = {
+    DIV,
+    REM
+  };
+  return DAG.getMergeValues(Res, DL);
+}
+
 SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);
   SDValue Src = Op.getOperand(0);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index e2000a0..0db89ecb 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -50,6 +50,7 @@ private:
   SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
-- 
1.9.3




More information about the llvm-commits mailing list