[llvm] r290301 - AMDGPU: Custom lower f16 fdiv
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 21 19:05:41 PST 2016
Author: arsenm
Date: Wed Dec 21 21:05:41 2016
New Revision: 290301
URL: http://llvm.org/viewvc/llvm-project?rev=290301&view=rev
Log:
AMDGPU: Custom lower f16 fdiv
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=290301&r1=290300&r2=290301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Dec 21 21:05:41 2016
@@ -299,7 +299,7 @@ SITargetLowering::SITargetLowering(const
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
- setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Custom);
// F16 - VOP3 Actions.
setOperationAction(ISD::FMA, MVT::f16, Legal);
@@ -3008,6 +3008,23 @@ static SDValue getFPTernOp(SelectionDAG
GlueChain.getValue(2));
}
+SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src0 = Op.getOperand(0);
+ SDValue Src1 = Op.getOperand(1);
+
+ SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
+ SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
+
+ SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1);
+ SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1);
+
+ SDValue FPRoundFlag = DAG.getTargetConstant(0, SL, MVT::i32);
+ SDValue BestQuot = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, FPRoundFlag);
+
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f16, BestQuot, Src1, Src0);
+}
+
// Faster 2.5 ULP division that does not support denormals.
SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
@@ -3201,6 +3218,9 @@ SDValue SITargetLowering::LowerFDIV(SDVa
if (VT == MVT::f64)
return LowerFDIV64(Op, DAG);
+ if (VT == MVT::f16)
+ return LowerFDIV16(Op, DAG);
+
llvm_unreachable("Unexpected type for fdiv");
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=290301&r1=290300&r2=290301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Wed Dec 21 21:05:41 2016
@@ -37,6 +37,7 @@ class SITargetLowering final : public AM
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
Modified: llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll?rev=290301&r1=290300&r2=290301&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll Wed Dec 21 21:05:41 2016
@@ -4,27 +4,39 @@
; Make sure fdiv is promoted to f32.
; GCN-LABEL: {{^}}fdiv_f16
-; GCN: v_cvt_f32_f16
-; GCN: v_cvt_f32_f16
-; GCN: v_div_scale_f32
-; GCN-DAG: v_div_scale_f32
-; GCN-DAG: v_rcp_f32
-; GCN: v_fma_f32
-; GCN: v_fma_f32
-; GCN: v_mul_f32
-; GCN: v_fma_f32
-; GCN: v_fma_f32
-; GCN: v_fma_f32
-; GCN: v_div_fmas_f32
-; GCN: v_div_fixup_f32
-; GCN: v_cvt_f16_f32
+; SI: v_cvt_f32_f16
+; SI: v_cvt_f32_f16
+; SI: v_div_scale_f32
+; SI-DAG: v_div_scale_f32
+; SI-DAG: v_rcp_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_mul_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_div_fmas_f32
+; SI: v_div_fixup_f32
+; SI: v_cvt_f16_f32
+
+; VI: buffer_load_ushort [[LHS:v[0-9]+]]
+; VI: buffer_load_ushort [[RHS:v[0-9]+]]
+
+; VI-DAG: v_cvt_f32_f16_e32 [[CVT_LHS:v[0-9]+]], [[LHS]]
+; VI-DAG: v_cvt_f32_f16_e32 [[CVT_RHS:v[0-9]+]], [[RHS]]
+
+; VI-DAG: v_rcp_f32_e32 [[RCP_RHS:v[0-9]+]], [[CVT_RHS]]
+; VI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[RCP_RHS]], [[CVT_LHS]]
+; VI: v_cvt_f16_f32_e32 [[CVT_BACK:v[0-9]+]], [[MUL]]
+; VI: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK]], [[RHS]], [[LHS]]
+; VI: buffer_store_short [[RESULT]]
define void @fdiv_f16(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b) {
entry:
- %a.val = load half, half addrspace(1)* %a
- %b.val = load half, half addrspace(1)* %b
+ %a.val = load volatile half, half addrspace(1)* %a
+ %b.val = load volatile half, half addrspace(1)* %b
%r.val = fdiv half %a.val, %b.val
store half %r.val, half addrspace(1)* %r
ret void
More information about the llvm-commits
mailing list