[PATCH] R600/SI: implement range reduction for sin/cos

Wed Jun 25 13:14:23 PDT 2014

These instructions can only take a limited input range, and return
the constant value 1 out of range. We should do range reduction to
be able to process arbitrary values. Use a FRACT instruction after
normalization to achieve this.

v2: use DAG lowering instead of intrinsic, adapt test
---
 lib/Target/R600/SIISelLowering.cpp | 28 ++++++++++++++++++++++++++++
 lib/Target/R600/SIISelLowering.h   |  1 +
 lib/Target/R600/SIInstructions.td  |  8 ++++----
 test/CodeGen/R600/llvm.sin.ll      |  1 +
 4 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 29e4b98..b543816 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -80,6 +80,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SUBC, MVT::i32, Legal);
   setOperationAction(ISD::SUBE, MVT::i32, Legal);
 
+  setOperationAction(ISD::FSIN, MVT::f32, Custom);
+  setOperationAction(ISD::FCOS, MVT::f32, Custom);
+
   // We need to custom lower vector stores from local memory
   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
@@ -617,6 +620,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  case ISD::FSIN:
+  case ISD::FCOS:
+    return LowerTrig(Op, DAG);
   case ISD::SELECT: return LowerSELECT(Op, DAG);
   case ISD::STORE: return LowerSTORE(Op, DAG);
   case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
@@ -992,6 +998,28 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   return Chain;
 }
 
+SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  SDValue Arg = Op.getOperand(0);
+  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
+        DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
+          DAG.getConstantFP(0.15915494309, MVT::f32)));
+  unsigned TrigNode;
+
+  switch (Op.getOpcode()) {
+  case ISD::FCOS:
+    TrigNode = AMDGPUISD::COS_HW;
+    break;
+  case ISD::FSIN:
+    TrigNode = AMDGPUISD::SIN_HW;
+    break;
+  default:
+    llvm_unreachable("Wrong trig opcode");
+  }
+
+  return DAG.getNode(TrigNode, SDLoc(Op), VT, FractPart);
+}
+
 //===----------------------------------------------------------------------===//
 // Custom DAG optimizations
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 2f97a9a..10ba8f9 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -28,6 +28,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
 
   bool foldImm(SDValue &Operand, int32_t &Immediate,
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 0b12f60..7fc56a5 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -2253,13 +2253,13 @@ def : Pat<
 >;
 
 def : Pat <
-  (fcos f32:$src0),
-  (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+  (COS_HW f32:$src0),
+  (V_COS_F32_e32 $src0)
 >;
 
 def : Pat <
-  (fsin f32:$src0),
-  (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+  (SIN_HW f32:$src0),
+  (V_SIN_F32_e32 $src0)
 >;
 
 def : Pat <
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index 41c363c..dafbdbc 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -8,6 +8,7 @@
 ;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ;EG-NOT: SIN
 ;SI: V_MUL_F32
+;SI: V_FRACT_F32
 ;SI: V_SIN_F32
 ;SI-NOT: V_SIN_F32
 
-- 
1.8.3.2