[PATCH] D19076: AMDGPU/SI: Re-implement the lowering for 32-bit floating point division
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 13 14:52:31 PDT 2016
cfang updated this revision to Diff 53627.
cfang added a comment.
New diff posted. This actually implements the lowering of 32-bit floating point division: SITargetLowering::LowerFDIV32.
It affects frem because frem is using fdiv.
http://reviews.llvm.org/D19076
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
test/CodeGen/AMDGPU/frem.ll
Index: test/CodeGen/AMDGPU/frem.ll
===================================================================
--- test/CodeGen/AMDGPU/frem.ll
+++ test/CodeGen/AMDGPU/frem.ll
@@ -5,11 +5,12 @@
; FUNC-LABEL: {{^}}frem_f32:
; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
-; GCN-DAG: v_cmp
-; GCN-DAG: v_mul_f32
+; GCN: v_div_scale_f32
; GCN: v_rcp_f32_e32
+; GCN: v_fma_f32
; GCN: v_mul_f32_e32
-; GCN: v_mul_f32_e32
+; GCN: v_div_fmas_f32
+; GCN: v_div_fixup_f32
; GCN: v_trunc_f32_e32
; GCN: v_mad_f32
; GCN: s_endpgm
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1840,35 +1840,38 @@
return SDValue();
SDLoc SL(Op);
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+
+ const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
- const APFloat K0Val(BitsToFloat(0x6f800000));
- const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
+ SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
- const APFloat K1Val(BitsToFloat(0x2f800000));
- const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
+ SDValue d_scaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
+ SDValue n_scaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
- const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
+ SDValue approx_rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, d_scaled);
- EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
+ SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, d_scaled);
- SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
+ SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, approx_rcp, One);
- SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
+ SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma0, approx_rcp, approx_rcp);
- // TODO: Should this propagate fast-math-flags?
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, n_scaled, Fma1);
+
+ SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, n_scaled);
+
+ SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul);
- r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
+ SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, n_scaled);
- SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
+ SDValue Scale = n_scaled.getValue(1);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
+ SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32, Fma4, Fma1, Fma3, Scale);
- return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, Y, X);
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D19076.53627.patch
Type: text/x-patch
Size: 3064 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160413/9cc18769/attachment.bin>
More information about the llvm-commits
mailing list