[PATCH 1/2] R600/SI: implement range reduction for sin/cos
Grigori Goronzy
greg at chown.ath.cx
Thu Jul 17 14:50:04 PDT 2014
These instructions can only take a limited input range, and return
the constant value 1 out of range. We should do range reduction to
be able to process arbitrary values. Use a FRACT instruction after
normalization to achieve this.
v2: use DAG lowering instead of intrinsic, adapt test
v3: calculate constant, fold pattern into instruction definition
---
lib/Target/R600/AMDGPUInstrInfo.td | 3 +++
lib/Target/R600/SIISelLowering.cpp | 28 ++++++++++++++++++++++++++++
lib/Target/R600/SIISelLowering.h | 1 +
lib/Target/R600/SIInstructions.td | 18 ++++++------------
test/CodeGen/R600/llvm.sin.ll | 1 +
5 files changed, 39 insertions(+), 12 deletions(-)
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 934d59d..820f1a8 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -34,6 +34,9 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
+def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
+
// out = a - floor(a)
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index b3429b9..9098324 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -80,6 +80,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SUBC, MVT::i32, Legal);
setOperationAction(ISD::SUBE, MVT::i32, Legal);
+ setOperationAction(ISD::FSIN, MVT::f32, Custom);
+ setOperationAction(ISD::FCOS, MVT::f32, Custom);
+
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
@@ -637,6 +640,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
+ case ISD::FSIN:
+ case ISD::FCOS:
+ return LowerTrig(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::FDIV: return LowerFDIV(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
@@ -1116,6 +1122,28 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return Chain;
}
+SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDValue Arg = Op.getOperand(0);
+ SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
+ DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
+ DAG.getConstantFP(0.5 / M_PI, MVT::f32)));
+ unsigned TrigNode;
+
+ switch (Op.getOpcode()) {
+ case ISD::FCOS:
+ TrigNode = AMDGPUISD::COS_HW;
+ break;
+ case ISD::FSIN:
+ TrigNode = AMDGPUISD::SIN_HW;
+ break;
+ default:
+ llvm_unreachable("Wrong trig opcode");
+ }
+
+ return DAG.getNode(TrigNode, SDLoc(Op), VT, FractPart);
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG optimizations
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 9e9a0b0..b3343ee 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -32,6 +32,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
bool foldImm(SDValue &Operand, int32_t &Immediate,
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index a4920db..bd5be32 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1167,8 +1167,12 @@ defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
[(set f64:$dst, (fsqrt f64:$src0))]
>;
-defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
-defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
+defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32",
+ [(set f32:$dst, (AMDGPUsin f32:$src0))]
+>;
+defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32",
+ [(set f32:$dst, (AMDGPUcos f32:$src0))]
+>;
defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
@@ -2343,16 +2347,6 @@ def : Pat<
>;
def : Pat <
- (fcos f32:$src0),
- (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
->;
-
-def : Pat <
- (fsin f32:$src0),
- (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
->;
-
-def : Pat <
(int_AMDGPU_cube v4f32:$src),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
(V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index 41c363c..dafbdbc 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -8,6 +8,7 @@
;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
;EG-NOT: SIN
;SI: V_MUL_F32
+;SI: V_FRACT_F32
;SI: V_SIN_F32
;SI-NOT: V_SIN_F32
--
1.8.3.2
More information about the llvm-commits
mailing list