[PATCH] D47954: Utilize new SDNode flag functionality to expand current support for fdiv
Michael Berg via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 15 13:49:30 PDT 2018
This revision was automatically updated to reflect the committed changes.
Closed by commit rL334862: Utilize new SDNode flag functionality to expand current support for fdiv (authored by mcberg2017, committed by ).
Herald added a subscriber: llvm-commits.
Changed prior to commit:
https://reviews.llvm.org/D47954?vs=151429&id=151556#toc
Repository:
rL LLVM
https://reviews.llvm.org/D47954
Files:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll
llvm/trunk/test/CodeGen/X86/fmf-flags.ll
Index: llvm/trunk/test/CodeGen/X86/fmf-flags.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/fmf-flags.ll
+++ llvm/trunk/test/CodeGen/X86/fmf-flags.ll
@@ -8,17 +8,11 @@
; X64-LABEL: fast_recip_sqrt:
; X64: # %bb.0:
; X64-NEXT: rsqrtss %xmm0, %xmm1
-; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: cmpeqss %xmm0, %xmm2
; X64-NEXT: mulss %xmm1, %xmm0
-; X64-NEXT: movss {{.*}}(%rip), %xmm3
-; X64-NEXT: mulss %xmm0, %xmm3
; X64-NEXT: mulss %xmm1, %xmm0
; X64-NEXT: addss {{.*}}(%rip), %xmm0
-; X64-NEXT: mulss %xmm3, %xmm0
-; X64-NEXT: andnps %xmm0, %xmm2
-; X64-NEXT: movss {{.*}}(%rip), %xmm0
-; X64-NEXT: divss %xmm2, %xmm0
+; X64-NEXT: mulss {{.*}}(%rip), %xmm1
+; X64-NEXT: mulss %xmm1, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: fast_recip_sqrt:
@@ -89,10 +83,14 @@
define float @not_so_fast_recip_sqrt(float %x) {
; X64-LABEL: not_so_fast_recip_sqrt:
; X64: # %bb.0:
-; X64-NEXT: sqrtss %xmm0, %xmm1
-; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: divss %xmm1, %xmm0
-; X64-NEXT: movss %xmm1, {{.*}}(%rip)
+; X64-NEXT: rsqrtss %xmm0, %xmm1
+; X64-NEXT: sqrtss %xmm0, %xmm2
+; X64-NEXT: mulss %xmm1, %xmm0
+; X64-NEXT: mulss %xmm1, %xmm0
+; X64-NEXT: addss {{.*}}(%rip), %xmm0
+; X64-NEXT: mulss {{.*}}(%rip), %xmm1
+; X64-NEXT: mulss %xmm1, %xmm0
+; X64-NEXT: movss %xmm2, sqrt1(%rip)
; X64-NEXT: retq
;
; X86-LABEL: not_so_fast_recip_sqrt:
@@ -111,3 +109,19 @@
ret float %z
}
+define float @div_arcp_by_const(half %x) {
+; X64-LABEL: .LCPI4_0:
+; X64-NEXT: .long 1036828672
+; X64-LABEL: div_arcp_by_const:
+; X64: movzwl %ax, %edi
+; X64: mulss .LCPI4_0(%rip), %xmm0
+;
+; X86-LABEL: .LCPI4_0:
+; X86-NEXT: .long 1036828672
+; X86-LABEL: div_arcp_by_const:
+; X86: movzwl %ax, %eax
+; X86: fmuls .LCPI4_0
+ %rcp = fdiv arcp half %x, 10.0
+ %z = fpext half %rcp to float
+ ret float %z
+}
Index: llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/fdiv.f16.ll
@@ -218,7 +218,7 @@
}
; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f16:
-; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dcccccd, v{{[0-9]+}}
+; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dccc000, v{{[0-9]+}}
; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}}
; GFX8_9: buffer_store_short [[MUL]]
@@ -230,7 +230,7 @@
}
; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f16:
-; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdcccccd, v{{[0-9]+}}
+; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdccc000, v{{[0-9]+}}
; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}}
; GFX8_9: buffer_store_short [[MUL]]
Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10865,7 +10865,7 @@
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath) {
+ if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
// Compute the reciprocal 1.0 / c2.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D47954.151556.patch
Type: text/x-patch
Size: 3395 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180615/6ec343c6/attachment.bin>
More information about the llvm-commits
mailing list