[PATCH] D32085: AMDGPU: Do not lower fast unsafe div for safe, f32, with fp32 denormals

Fri Apr 14 14:11:53 PDT 2017

kzhuravl updated this revision to Diff 95343.
kzhuravl added a comment.

Upload full diff.


https://reviews.llvm.org/D32085

Files:
  lib/Target/AMDGPU/SIISelLowering.cpp
  test/CodeGen/AMDGPU/fdiv.ll


Index: test/CodeGen/AMDGPU/fdiv.ll
===================================================================

--- test/CodeGen/AMDGPU/fdiv.ll
+++ test/CodeGen/AMDGPU/fdiv.ll
@@ -85,10 +85,20 @@
 }
 
 ; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32:
-; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
-; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
-; GCN-NOT: [[RESULT]]
-; GCN: buffer_store_dword [[RESULT]]
+; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
+; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
+; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
+
+; GCN-NOT: s_setreg
+; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
+; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
+; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[B]], [[DEN_SCALE]]
+; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
+; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
+; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
+; GCN-NOT: s_setreg
+; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
+; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
 define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
 entry:
   %fdiv = fdiv fast float %a, %b
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3381,9 +3381,11 @@
   EVT VT = Op.getValueType();
   bool Unsafe = DAG.getTarget().Options.UnsafeFPMath;
 
+  if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals())
+    return SDValue();
+
   if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
-    if (Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
-        VT == MVT::f16) {
+    if (Unsafe || VT == MVT::f32 || VT == MVT::f16) {
       if (CLHS->isExactlyValue(1.0)) {
         // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
         // the CI documentation has a worst case error of 1 ulp.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D32085.95343.patch
Type: text/x-patch
Size: 2058 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170414/afdf6684/attachment.bin>