[PATCH] R600/SI: Custom lower f64 sqrt
Tom Stellard
thomas.stellard at amd.com
Fri Mar 6 13:35:42 PST 2015
This is necessay to get the correct precision.
---
lib/Target/R600/SIISelLowering.cpp | 65 ++++++++++++++++++++++++++++++++++++++
lib/Target/R600/SIISelLowering.h | 1 +
test/CodeGen/R600/fsqrt.ll | 5 ++-
test/CodeGen/R600/rsq.ll | 5 ++-
4 files changed, 74 insertions(+), 2 deletions(-)
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 7d794b8..117f8ab 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -81,6 +81,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::f64, Custom);
+
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
@@ -703,6 +705,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerTrig(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::FDIV: return LowerFDIV(Op, DAG);
+ case ISD::FSQRT: return LowerFSQRT(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::GlobalAddress: {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1195,6 +1198,68 @@ SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Unexpected type for fdiv");
}
+SDValue SITargetLowering::LowerFSQRT(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::f64);
+ SDLoc SL(Op);
+
+ SDValue S01 = Op.getOperand(0);
+
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+
+ SDValue V01 = DAG.getConstantFP(BitsToDouble(0x1000000000000000), MVT::f64);
+
+ SDValue VCC = DAG.getSetCC(SL, MVT::i1, S01, V01, ISD::SETOLT);
+
+ SDValue V0 = DAG.getConstant(0x100, MVT::i32);
+
+ V0 = DAG.getSelect(SL, MVT::i32, VCC, V0, Zero);
+
+ V01 = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, S01, V0);
+
+ SDValue V23 = DAG.getNode(AMDGPUISD::RSQ, SL, MVT::f64, V01);
+
+ SDValue V45 = DAG.getNode(ISD::FMUL, SL, MVT::f64, V01, V23);
+
+ SDValue Half = DAG.getConstantFP(0.5, MVT::f64);
+ V23 = DAG.getNode(ISD::FMUL, SL, MVT::f64, V23, Half);
+
+ SDValue NegV23 = DAG.getNode(ISD::FNEG, SL, MVT::f64, V23);
+ SDValue V67 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegV23, V45, Half);
+
+ V45 = DAG.getNode(ISD::FMA, SL, MVT::f64, V45, V67, V45);
+
+ SDValue NegV45 = DAG.getNode(ISD::FNEG, SL, MVT::f64, V45);
+ SDValue V89 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegV45, V45, V01);
+
+ V23 = DAG.getNode(ISD::FMA, SL, MVT::f64, V23, V67, V23);
+
+ V45 = DAG.getNode(ISD::FMA, SL, MVT::f64, V89, V23, V45);
+
+ NegV45 = DAG.getNode(ISD::FNEG, SL, MVT::f64, V45);
+ V67 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegV45, V45, V01);
+
+ SDValue V8 = DAG.getConstant(0xffffff80, MVT::i32);
+
+ SDValue V910 = DAG.getConstantFP(BitsToDouble(0x7ff0000000000000), MVT::f64);
+
+ V23 = DAG.getNode(ISD::FMA, SL, MVT::f64, V67, V23, V45);
+
+ SDValue V4 = DAG.getSelect(SL, MVT::i32, VCC, V8, Zero);
+
+ SDValue S89 = DAG.getSetCC(SL, MVT::i1, S01, V910, ISD::SETOEQ);
+
+ SDValue DZero = DAG.getConstantFP(0.0, MVT::f64);
+ VCC = DAG.getSetCC(SL, MVT::i1, S01, DZero, ISD::SETOEQ);
+
+ V23 = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, V23, V4);
+
+ VCC = DAG.getNode(ISD::OR, SL, MVT::i1, S89, VCC);
+
+ V01 = DAG.getSelect(SL, MVT::f64, VCC, V01, V23);
+
+ return V01;
+}
+
SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
StoreSDNode *Store = cast<StoreSDNode>(Op);
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 92f5847..ff04704 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -36,6 +36,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFSQRT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll
index 0410134..36274be 100644
--- a/test/CodeGen/R600/fsqrt.ll
+++ b/test/CodeGen/R600/fsqrt.ll
@@ -16,7 +16,10 @@ define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
}
; CHECK: {{^}}fsqrt_f64:
-; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; CHECK-NOT: v_sqrt_f64
+; CHECK: v_ldexp_f64
+; CHECK: v_rsq_f64
+; CHECK: v_ldexp_f64
define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r0 = load double, double addrspace(1)* %in
diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll
index b67b800..f9e4e15 100644
--- a/test/CodeGen/R600/rsq.ll
+++ b/test/CodeGen/R600/rsq.ll
@@ -18,7 +18,10 @@ define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noali
; SI-LABEL: {{^}}rsq_f64:
; SI-UNSAFE: v_rsq_f64_e32
-; SI-SAFE: v_sqrt_f64_e32
+; SI-SAFE-NOT: v_sqrt_f64
+; SI-SAFE: v_ldexp_f64
+; SI-SAFE: v_rsq_f64
+; SI-SAFE: v_ldexp_f64
; SI: s_endpgm
define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
%val = load double, double addrspace(1)* %in, align 4
--
2.0.4
More information about the llvm-commits
mailing list