[llvm] r334113 - guard fsqrt with fmf sub flags
Michael Berg via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 6 11:47:55 PDT 2018
Author: mcberg2017
Date: Wed Jun 6 11:47:55 2018
New Revision: 334113
URL: http://llvm.org/viewvc/llvm-project?rev=334113&view=rev
Log:
guard fsqrt with fmf sub flags
Summary:
This change uses fmf subflags to guard optimizations as well as unsafe. These changes originated from D46483.
It contains only context for fsqrt.
Reviewers: spatel, hfinkel, arsenm
Reviewed By: spatel
Subscribers: hfinkel, wdng, andrew.w.kaylor, wristow, efriedma, nemanjai
Differential Revision: https://reviews.llvm.org/D47749
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll
llvm/trunk/test/CodeGen/X86/fmf-flags.ll
llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=334113&r1=334112&r2=334113&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Jun 6 11:47:55 2018
@@ -10893,17 +10893,16 @@ SDValue DAGCombiner::visitFREM(SDNode *N
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (!DAG.getTarget().Options.UnsafeFPMath)
+ SDNodeFlags Flags = N->getFlags();
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !Flags.hasApproximateFuncs())
return SDValue();
SDValue N0 = N->getOperand(0);
if (TLI.isFsqrtCheap(N0, DAG))
return SDValue();
- // TODO: FSQRT nodes should have flags that propagate to the created nodes.
- // For now, create a Flags object for use with reassociation math transforms.
- SDNodeFlags Flags;
- Flags.setAllowReassociation(true);
+ // FSQRT nodes have flags that propagate to the created nodes.
return buildSqrtEstimate(N0, Flags);
}
Modified: llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll?rev=334113&r1=334112&r2=334113&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll Wed Jun 6 11:47:55 2018
@@ -300,18 +300,34 @@ define float @fmul_fma_fast2(float %x) {
; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
-; FMFDEBUG: fsqrt afn {{t[0-9]+}}
+; FMFDEBUG: fmul afn {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
-; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG: fmul afn {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:'
define float @sqrt_afn(float %x) {
; FMF-LABEL: sqrt_afn:
-; FMF: # %bb.0:
-; FMF-NEXT: xssqrtsp 1, 1
-; FMF-NEXT: blr
+; FMF: # %bb.0:
+; FMF-NEXT: xxlxor 0, 0, 0
+; FMF-NEXT: fcmpu 0, 1, 0
+; FMF-NEXT: beq 0, .LBB10_2
+; FMF-NEXT: # %bb.1:
+; FMF-NEXT: addis 3, 2, .LCPI10_0 at toc@ha
+; FMF-NEXT: xsrsqrtesp 3, 1
+; FMF-NEXT: addi 3, 3, .LCPI10_0 at toc@l
+; FMF-NEXT: lfsx 0, 0, 3
+; FMF-NEXT: xsmulsp 2, 1, 0
+; FMF-NEXT: xsmulsp 4, 3, 3
+; FMF-NEXT: xssubsp 2, 2, 1
+; FMF-NEXT: xsmulsp 2, 2, 4
+; FMF-NEXT: xssubsp 0, 0, 2
+; FMF-NEXT: xsmulsp 0, 3, 0
+; FMF-NEXT: xsmulsp 0, 0, 1
+; FMF-NEXT: .LBB10_2:
+; FMF-NEXT: fmr 1, 0
+; FMF-NEXT: blr
;
; GLOBAL-LABEL: sqrt_afn:
; GLOBAL: # %bb.0:
@@ -340,18 +356,34 @@ define float @sqrt_afn(float %x) {
; The call is now fully 'fast'. This implies that approximation is allowed.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
-; FMFDEBUG: fsqrt nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
+; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
-; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
define float @sqrt_fast(float %x) {
; FMF-LABEL: sqrt_fast:
-; FMF: # %bb.0:
-; FMF-NEXT: xssqrtsp 1, 1
-; FMF-NEXT: blr
+; FMF: # %bb.0:
+; FMF-NEXT: xxlxor 0, 0, 0
+; FMF-NEXT: fcmpu 0, 1, 0
+; FMF-NEXT: beq 0, .LBB11_2
+; FMF-NEXT: # %bb.1:
+; FMF-NEXT: xsrsqrtesp 2, 1
+; FMF-NEXT: addis 3, 2, .LCPI11_0 at toc@ha
+; FMF-NEXT: fneg 0, 1
+; FMF-NEXT: fmr 4, 1
+; FMF-NEXT: addi 3, 3, .LCPI11_0 at toc@l
+; FMF-NEXT: lfsx 3, 0, 3
+; FMF-NEXT: xsmaddasp 4, 0, 3
+; FMF-NEXT: xsmulsp 0, 2, 2
+; FMF-NEXT: xsmaddasp 3, 4, 0
+; FMF-NEXT: xsmulsp 0, 2, 3
+; FMF-NEXT: xsmulsp 0, 0, 1
+; FMF-NEXT: .LBB11_2:
+; FMF-NEXT: fmr 1, 0
+; FMF-NEXT: blr
;
; GLOBAL-LABEL: sqrt_fast:
; GLOBAL: # %bb.0:
Modified: llvm/trunk/test/CodeGen/X86/fmf-flags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fmf-flags.ll?rev=334113&r1=334112&r2=334113&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fmf-flags.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fmf-flags.ll Wed Jun 6 11:47:55 2018
@@ -7,9 +7,18 @@ declare float @llvm.sqrt.f32(float %x);
define float @fast_recip_sqrt(float %x) {
; X64-LABEL: fast_recip_sqrt:
; X64: # %bb.0:
-; X64-NEXT: sqrtss %xmm0, %xmm1
-; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: divss %xmm1, %xmm0
+; X64-NEXT: rsqrtss %xmm0, %xmm1
+; X64-NEXT: xorps %xmm2, %xmm2
+; X64-NEXT: cmpeqss %xmm0, %xmm2
+; X64-NEXT: mulss %xmm1, %xmm0
+; X64-NEXT: movss {{.*}}(%rip), %xmm3
+; X64-NEXT: mulss %xmm0, %xmm3
+; X64-NEXT: mulss %xmm1, %xmm0
+; X64-NEXT: addss {{.*}}(%rip), %xmm0
+; X64-NEXT: mulss %xmm3, %xmm0
+; X64-NEXT: andnps %xmm0, %xmm2
+; X64-NEXT: movss {{.*}}(%rip), %xmm0
+; X64-NEXT: divss %xmm2, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: fast_recip_sqrt:
Modified: llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll?rev=334113&r1=334112&r2=334113&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll Wed Jun 6 11:47:55 2018
@@ -7,16 +7,16 @@ define float @foo(float %f) #0 {
; CHECK: body:
; CHECK: %0:fr32 = COPY $xmm0
; CHECK: %1:fr32 = VRSQRTSSr killed %2, %0
-; CHECK: %3:fr32 = reassoc VMULSSrr %0, %1
+; CHECK: %3:fr32 = VMULSSrr %0, %1
; CHECK: %4:fr32 = VMOVSSrm
; CHECK: %5:fr32 = VFMADD213SSr %1, killed %3, %4
; CHECK: %6:fr32 = VMOVSSrm
-; CHECK: %7:fr32 = reassoc VMULSSrr %1, %6
-; CHECK: %8:fr32 = reassoc VMULSSrr killed %7, killed %5
-; CHECK: %9:fr32 = reassoc VMULSSrr %0, %8
+; CHECK: %7:fr32 = VMULSSrr %1, %6
+; CHECK: %8:fr32 = VMULSSrr killed %7, killed %5
+; CHECK: %9:fr32 = VMULSSrr %0, %8
; CHECK: %10:fr32 = VFMADD213SSr %8, %9, %4
-; CHECK: %11:fr32 = reassoc VMULSSrr %9, %6
-; CHECK: %12:fr32 = reassoc VMULSSrr killed %11, killed %10
+; CHECK: %11:fr32 = VMULSSrr %9, %6
+; CHECK: %12:fr32 = VMULSSrr killed %11, killed %10
; CHECK: %14:fr32 = FsFLD0SS
; CHECK: %15:fr32 = VCMPSSrr %0, killed %14, 0
; CHECK: %17:vr128 = VANDNPSrr killed %16, killed %13
More information about the llvm-commits
mailing list