[llvm] 95bcab8 - [DAGCombiner] Require ninf for sqrt recip estimation

Qiu Chaofan via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 1 01:24:12 PDT 2020


Author: Qiu Chaofan
Date: 2020-04-01T16:23:43+08:00
New Revision: 95bcab8272ced7444bc25353784de96ad9375c02

URL: https://github.com/llvm/llvm-project/commit/95bcab8272ced7444bc25353784de96ad9375c02
DIFF: https://github.com/llvm/llvm-project/commit/95bcab8272ced7444bc25353784de96ad9375c02.diff

LOG: [DAGCombiner] Require ninf for sqrt recip estimation

Currently, DAG combiner uses (fmul (rsqrt x) x) to estimate square
root of x. However, this method would return NaN if x is +Inf, which
is incorrect.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D76853

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/NVPTX/fast-math.ll
    llvm/test/CodeGen/NVPTX/sqrt-approx.ll
    llvm/test/CodeGen/PowerPC/fmf-propagation.ll
    llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
    llvm/test/CodeGen/X86/sqrt-fastmath.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index df4ec4cac1a3..a74060249d4b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13109,8 +13109,12 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
 
 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
   SDNodeFlags Flags = N->getFlags();
-  if (!DAG.getTarget().Options.UnsafeFPMath &&
-      !Flags.hasApproximateFuncs())
+  const TargetOptions &Options = DAG.getTarget().Options;
+
+  // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
+  // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
+  if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+      (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
     return SDValue();
 
   SDValue N0 = N->getOperand(0);

diff  --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index 900521664e0c..db5fb63f4e76 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -13,7 +13,7 @@ define float @sqrt_div(float %a, float %b) {
 }
 
 ; CHECK-LABEL: sqrt_div_fast(
-; CHECK: sqrt.approx.f32
+; CHECK: sqrt.rn.f32
 ; CHECK: div.approx.f32
 define float @sqrt_div_fast(float %a, float %b) #0 {
   %t1 = tail call float @llvm.sqrt.f32(float %a)
@@ -21,6 +21,15 @@ define float @sqrt_div_fast(float %a, float %b) #0 {
   ret float %t2
 }
 
+; CHECK-LABEL: sqrt_div_fast_ninf(
+; CHECK: sqrt.approx.f32
+; CHECK: div.approx.f32
+define float @sqrt_div_fast_ninf(float %a, float %b) #0 {
+  %t1 = tail call ninf float @llvm.sqrt.f32(float %a)
+  %t2 = fdiv float %t1, %b
+  ret float %t2
+}
+
 ; CHECK-LABEL: sqrt_div_ftz(
 ; CHECK: sqrt.rn.ftz.f32
 ; CHECK: div.rn.ftz.f32
@@ -31,7 +40,7 @@ define float @sqrt_div_ftz(float %a, float %b) #1 {
 }
 
 ; CHECK-LABEL: sqrt_div_fast_ftz(
-; CHECK: sqrt.approx.ftz.f32
+; CHECK: sqrt.rn.ftz.f32
 ; CHECK: div.approx.ftz.f32
 define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
   %t1 = tail call float @llvm.sqrt.f32(float %a)
@@ -39,12 +48,20 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
   ret float %t2
 }
 
+; CHECK-LABEL: sqrt_div_fast_ftz_ninf(
+; CHECK: sqrt.approx.ftz.f32
+; CHECK: div.approx.ftz.f32
+define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 {
+  %t1 = tail call ninf float @llvm.sqrt.f32(float %a)
+  %t2 = fdiv float %t1, %b
+  ret float %t2
+}
+
 ; There are no fast-math or ftz versions of sqrt and div for f64.  We use
 ; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
 
 ; CHECK-LABEL: sqrt_div_fast_ftz_f64(
-; CHECK: rsqrt.approx.f64
-; CHECK: rcp.approx.ftz.f64
+; CHECK: sqrt.rn.f64
 ; CHECK: div.rn.f64
 define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
   %t1 = tail call double @llvm.sqrt.f64(double %a)
@@ -52,6 +69,16 @@ define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
   ret double %t2
 }
 
+; CHECK-LABEL: sqrt_div_fast_ftz_f64_ninf(
+; CHECK: rsqrt.approx.f64
+; CHECK: rcp.approx.ftz.f64
+; CHECK: div.rn.f64
+define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 {
+  %t1 = tail call ninf double @llvm.sqrt.f64(double %a)
+  %t2 = fdiv double %t1, %b
+  ret double %t2
+}
+
 ; CHECK-LABEL: rsqrt(
 ; CHECK-NOT: rsqrt.approx
 ; CHECK: sqrt.rn.f32

diff  --git a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
index a8590b7c43ab..465b696c7610 100644
--- a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
@@ -45,35 +45,63 @@ define double @test_rsqrt64_ftz(double %a) #0 #1 {
 
 ; CHECK-LABEL test_sqrt32
 define float @test_sqrt32(float %a) #0 {
-; CHECK: sqrt.approx.f32
+; CHECK: sqrt.rn.f32
   %ret = tail call float @llvm.sqrt.f32(float %a)
   ret float %ret
 }
 
+; CHECK-LABEL test_sqrt32_ninf
+define float @test_sqrt32_ninf(float %a) #0 {
+; CHECK: sqrt.approx.f32
+  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+  ret float %ret
+}
+
 ; CHECK-LABEL test_sqrt_ftz
 define float @test_sqrt_ftz(float %a) #0 #1 {
-; CHECK: sqrt.approx.ftz.f32
+; CHECK: sqrt.rn.ftz.f32
   %ret = tail call float @llvm.sqrt.f32(float %a)
   ret float %ret
 }
 
+; CHECK-LABEL test_sqrt_ftz_ninf
+define float @test_sqrt_ftz_ninf(float %a) #0 #1 {
+; CHECK: sqrt.approx.ftz.f32
+  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+  ret float %ret
+}
+
 ; CHECK-LABEL test_sqrt64
 define double @test_sqrt64(double %a) #0 {
+; CHECK: sqrt.rn.f64
+  %ret = tail call double @llvm.sqrt.f64(double %a)
+  ret double %ret
+}
+
+; CHECK-LABEL test_sqrt64_ninf
+define double @test_sqrt64_ninf(double %a) #0 {
 ; There's no sqrt.approx.f64 instruction; we emit
 ; reciprocal(rsqrt.approx.f64(x)).  There's no non-ftz approximate reciprocal,
 ; so we just use the ftz version.
 ; CHECK: rsqrt.approx.f64
 ; CHECK: rcp.approx.ftz.f64
-  %ret = tail call double @llvm.sqrt.f64(double %a)
+  %ret = tail call ninf double @llvm.sqrt.f64(double %a)
   ret double %ret
 }
 
 ; CHECK-LABEL test_sqrt64_ftz
 define double @test_sqrt64_ftz(double %a) #0 #1 {
+; CHECK: sqrt.rn.f64
+  %ret = tail call double @llvm.sqrt.f64(double %a)
+  ret double %ret
+}
+
+; CHECK-LABEL test_sqrt64_ftz_ninf
+define double @test_sqrt64_ftz_ninf(double %a) #0 #1 {
 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
 ; CHECK: rsqrt.approx.f64
 ; CHECK: rcp.approx.ftz.f64
-  %ret = tail call double @llvm.sqrt.f64(double %a)
+  %ret = tail call ninf double @llvm.sqrt.f64(double %a)
   ret double %ret
 }
 
@@ -92,11 +120,18 @@ define float @test_rsqrt32_refined(float %a) #0 #2 {
 
 ; CHECK-LABEL: test_sqrt32_refined
 define float @test_sqrt32_refined(float %a) #0 #2 {
-; CHECK: rsqrt.approx.f32
+; CHECK: sqrt.rn.f32
   %ret = tail call float @llvm.sqrt.f32(float %a)
   ret float %ret
 }
 
+; CHECK-LABEL: test_sqrt32_refined_ninf
+define float @test_sqrt32_refined_ninf(float %a) #0 #2 {
+; CHECK: rsqrt.approx.f32
+  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+  ret float %ret
+}
+
 ; CHECK-LABEL: test_rsqrt64_refined
 define double @test_rsqrt64_refined(double %a) #0 #2 {
 ; CHECK: rsqrt.approx.f64
@@ -107,11 +142,18 @@ define double @test_rsqrt64_refined(double %a) #0 #2 {
 
 ; CHECK-LABEL: test_sqrt64_refined
 define double @test_sqrt64_refined(double %a) #0 #2 {
-; CHECK: rsqrt.approx.f64
+; CHECK: sqrt.rn.f64
   %ret = tail call double @llvm.sqrt.f64(double %a)
   ret double %ret
 }
 
+; CHECK-LABEL: test_sqrt64_refined_ninf
+define double @test_sqrt64_refined_ninf(double %a) #0 #2 {
+; CHECK: rsqrt.approx.f64
+  %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+  ret double %ret
+}
+
 ; -- refined sqrt and rsqrt with ftz enabled --
 
 ; CHECK-LABEL: test_rsqrt32_refined_ftz
@@ -124,11 +166,18 @@ define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
 
 ; CHECK-LABEL: test_sqrt32_refined_ftz
 define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
-; CHECK: rsqrt.approx.ftz.f32
+; CHECK: sqrt.rn.ftz.f32
   %ret = tail call float @llvm.sqrt.f32(float %a)
   ret float %ret
 }
 
+; CHECK-LABEL: test_sqrt32_refined_ftz_ninf
+define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 {
+; CHECK: rsqrt.approx.ftz.f32
+  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+  ret float %ret
+}
+
 ; CHECK-LABEL: test_rsqrt64_refined_ftz
 define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
 ; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version.
@@ -140,11 +189,18 @@ define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
 
 ; CHECK-LABEL: test_sqrt64_refined_ftz
 define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
-; CHECK: rsqrt.approx.f64
+; CHECK: sqrt.rn.f64
   %ret = tail call double @llvm.sqrt.f64(double %a)
   ret double %ret
 }
 
+; CHECK-LABEL: test_sqrt64_refined_ftz_ninf
+define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 {
+; CHECK: rsqrt.approx.f64
+  %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+  ret double %ret
+}
+
 attributes #0 = { "unsafe-fp-math" = "true" }
 attributes #1 = { "denormal-fp-math-f32" = "preserve-sign,preserve-sign" }
 attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }

diff  --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index 222583638d59..59a7d233c0c3 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -270,11 +270,11 @@ define float @fmul_fma_fast2(float %x) {
 ; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
-; FMFDEBUG:         fmul afn {{t[0-9]+}}
+; FMFDEBUG:         fmul ninf afn {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
-; GLOBALDEBUG:         fmul afn {{t[0-9]+}}
+; GLOBALDEBUG:         fmul ninf afn {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
 
 define float @sqrt_afn_ieee(float %x) #0 {
@@ -321,17 +321,31 @@ define float @sqrt_afn_ieee(float %x) #0 {
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 ; GLOBAL-NEXT:  .LBB10_2:
 ; GLOBAL-NEXT:    fmr 1, 0
+; GLOBAL-NEXT:    blr
+  %rt = call afn ninf float @llvm.sqrt.f32(float %x)
+  ret float %rt
+}
+
+define float @sqrt_afn_ieee_inf(float %x) #0 {
+; FMF-LABEL: sqrt_afn_ieee_inf:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xssqrtsp 1, 1
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: sqrt_afn_ieee_inf:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xssqrtsp 1, 1
 ; GLOBAL-NEXT:    blr
   %rt = call afn float @llvm.sqrt.f32(float %x)
   ret float %rt
 }
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
-; FMFDEBUG:         fmul afn {{t[0-9]+}}
+; FMFDEBUG:         fmul ninf afn {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
-; GLOBALDEBUG:         fmul afn {{t[0-9]+}}
+; GLOBALDEBUG:         fmul ninf afn {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
 
 define float @sqrt_afn_preserve_sign(float %x) #1 {
@@ -339,19 +353,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    xxlxor 0, 0, 0
 ; FMF-NEXT:    fcmpu 0, 1, 0
-; FMF-NEXT:    beq 0, .LBB11_2
+; FMF-NEXT:    beq 0, .LBB12_2
 ; FMF-NEXT:  # %bb.1:
 ; FMF-NEXT:    xsrsqrtesp 0, 1
-; FMF-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
-; FMF-NEXT:    addis 4, 2, .LCPI11_1 at toc@ha
-; FMF-NEXT:    lfs 2, .LCPI11_0 at toc@l(3)
-; FMF-NEXT:    lfs 3, .LCPI11_1 at toc@l(4)
+; FMF-NEXT:    addis 3, 2, .LCPI12_0 at toc@ha
+; FMF-NEXT:    addis 4, 2, .LCPI12_1 at toc@ha
+; FMF-NEXT:    lfs 2, .LCPI12_0 at toc@l(3)
+; FMF-NEXT:    lfs 3, .LCPI12_1 at toc@l(4)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    xsmulsp 0, 1, 0
 ; FMF-NEXT:    xsmulsp 1, 1, 2
 ; FMF-NEXT:    xsaddsp 0, 0, 3
 ; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:  .LBB11_2:
+; FMF-NEXT:  .LBB12_2:
 ; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
 ;
@@ -359,19 +373,33 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 ; GLOBAL-NEXT:    fcmpu 0, 1, 0
-; GLOBAL-NEXT:    beq 0, .LBB11_2
+; GLOBAL-NEXT:    beq 0, .LBB12_2
 ; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
-; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
-; GLOBAL-NEXT:    addis 4, 2, .LCPI11_1 at toc@ha
-; GLOBAL-NEXT:    lfs 2, .LCPI11_0 at toc@l(3)
-; GLOBAL-NEXT:    lfs 3, .LCPI11_1 at toc@l(4)
+; GLOBAL-NEXT:    addis 3, 2, .LCPI12_0 at toc@ha
+; GLOBAL-NEXT:    addis 4, 2, .LCPI12_1 at toc@ha
+; GLOBAL-NEXT:    lfs 2, .LCPI12_0 at toc@l(3)
+; GLOBAL-NEXT:    lfs 3, .LCPI12_1 at toc@l(4)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
 ; GLOBAL-NEXT:    xsmulsp 0, 1, 3
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
-; GLOBAL-NEXT:  .LBB11_2:
+; GLOBAL-NEXT:  .LBB12_2:
 ; GLOBAL-NEXT:    fmr 1, 0
+; GLOBAL-NEXT:    blr
+  %rt = call afn ninf float @llvm.sqrt.f32(float %x)
+  ret float %rt
+}
+
+define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
+; FMF-LABEL: sqrt_afn_preserve_sign_inf:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xssqrtsp 1, 1
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xssqrtsp 1, 1
 ; GLOBAL-NEXT:    blr
   %rt = call afn float @llvm.sqrt.f32(float %x)
   ret float %rt
@@ -390,45 +418,45 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
 define float @sqrt_fast_ieee(float %x) #0 {
 ; FMF-LABEL: sqrt_fast_ieee:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    addis 3, 2, .LCPI12_2 at toc@ha
+; FMF-NEXT:    addis 3, 2, .LCPI14_2 at toc@ha
 ; FMF-NEXT:    fabs 0, 1
-; FMF-NEXT:    lfs 2, .LCPI12_2 at toc@l(3)
+; FMF-NEXT:    lfs 2, .LCPI14_2 at toc@l(3)
 ; FMF-NEXT:    fcmpu 0, 0, 2
 ; FMF-NEXT:    xxlxor 0, 0, 0
-; FMF-NEXT:    blt 0, .LBB12_2
+; FMF-NEXT:    blt 0, .LBB14_2
 ; FMF-NEXT:  # %bb.1:
 ; FMF-NEXT:    xsrsqrtesp 0, 1
-; FMF-NEXT:    addis 3, 2, .LCPI12_0 at toc@ha
-; FMF-NEXT:    addis 4, 2, .LCPI12_1 at toc@ha
-; FMF-NEXT:    lfs 2, .LCPI12_0 at toc@l(3)
-; FMF-NEXT:    lfs 3, .LCPI12_1 at toc@l(4)
+; FMF-NEXT:    addis 3, 2, .LCPI14_0 at toc@ha
+; FMF-NEXT:    addis 4, 2, .LCPI14_1 at toc@ha
+; FMF-NEXT:    lfs 2, .LCPI14_0 at toc@l(3)
+; FMF-NEXT:    lfs 3, .LCPI14_1 at toc@l(4)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    xsmaddasp 2, 1, 0
 ; FMF-NEXT:    xsmulsp 0, 1, 3
 ; FMF-NEXT:    xsmulsp 0, 0, 2
-; FMF-NEXT:  .LBB12_2:
+; FMF-NEXT:  .LBB14_2:
 ; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: sqrt_fast_ieee:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    addis 3, 2, .LCPI12_2 at toc@ha
+; GLOBAL-NEXT:    addis 3, 2, .LCPI14_2 at toc@ha
 ; GLOBAL-NEXT:    fabs 0, 1
-; GLOBAL-NEXT:    lfs 2, .LCPI12_2 at toc@l(3)
+; GLOBAL-NEXT:    lfs 2, .LCPI14_2 at toc@l(3)
 ; GLOBAL-NEXT:    fcmpu 0, 0, 2
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
-; GLOBAL-NEXT:    blt 0, .LBB12_2
+; GLOBAL-NEXT:    blt 0, .LBB14_2
 ; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
-; GLOBAL-NEXT:    addis 3, 2, .LCPI12_0 at toc@ha
-; GLOBAL-NEXT:    addis 4, 2, .LCPI12_1 at toc@ha
-; GLOBAL-NEXT:    lfs 2, .LCPI12_0 at toc@l(3)
-; GLOBAL-NEXT:    lfs 3, .LCPI12_1 at toc@l(4)
+; GLOBAL-NEXT:    addis 3, 2, .LCPI14_0 at toc@ha
+; GLOBAL-NEXT:    addis 4, 2, .LCPI14_1 at toc@ha
+; GLOBAL-NEXT:    lfs 2, .LCPI14_0 at toc@l(3)
+; GLOBAL-NEXT:    lfs 3, .LCPI14_1 at toc@l(4)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
 ; GLOBAL-NEXT:    xsmulsp 0, 1, 3
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
-; GLOBAL-NEXT:  .LBB12_2:
+; GLOBAL-NEXT:  .LBB14_2:
 ; GLOBAL-NEXT:    fmr 1, 0
 ; GLOBAL-NEXT:    blr
   %rt = call fast float @llvm.sqrt.f32(float %x)
@@ -450,18 +478,18 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    xxlxor 0, 0, 0
 ; FMF-NEXT:    fcmpu 0, 1, 0
-; FMF-NEXT:    beq 0, .LBB13_2
+; FMF-NEXT:    beq 0, .LBB15_2
 ; FMF-NEXT:  # %bb.1:
 ; FMF-NEXT:    xsrsqrtesp 0, 1
-; FMF-NEXT:    addis 3, 2, .LCPI13_0 at toc@ha
-; FMF-NEXT:    addis 4, 2, .LCPI13_1 at toc@ha
-; FMF-NEXT:    lfs 2, .LCPI13_0 at toc@l(3)
-; FMF-NEXT:    lfs 3, .LCPI13_1 at toc@l(4)
+; FMF-NEXT:    addis 3, 2, .LCPI15_0 at toc@ha
+; FMF-NEXT:    addis 4, 2, .LCPI15_1 at toc@ha
+; FMF-NEXT:    lfs 2, .LCPI15_0 at toc@l(3)
+; FMF-NEXT:    lfs 3, .LCPI15_1 at toc@l(4)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    xsmaddasp 2, 1, 0
 ; FMF-NEXT:    xsmulsp 0, 1, 3
 ; FMF-NEXT:    xsmulsp 0, 0, 2
-; FMF-NEXT:  .LBB13_2:
+; FMF-NEXT:  .LBB15_2:
 ; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
 ;
@@ -469,18 +497,18 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 ; GLOBAL-NEXT:    fcmpu 0, 1, 0
-; GLOBAL-NEXT:    beq 0, .LBB13_2
+; GLOBAL-NEXT:    beq 0, .LBB15_2
 ; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
-; GLOBAL-NEXT:    addis 3, 2, .LCPI13_0 at toc@ha
-; GLOBAL-NEXT:    addis 4, 2, .LCPI13_1 at toc@ha
-; GLOBAL-NEXT:    lfs 2, .LCPI13_0 at toc@l(3)
-; GLOBAL-NEXT:    lfs 3, .LCPI13_1 at toc@l(4)
+; GLOBAL-NEXT:    addis 3, 2, .LCPI15_0 at toc@ha
+; GLOBAL-NEXT:    addis 4, 2, .LCPI15_1 at toc@ha
+; GLOBAL-NEXT:    lfs 2, .LCPI15_0 at toc@l(3)
+; GLOBAL-NEXT:    lfs 3, .LCPI15_1 at toc@l(4)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
 ; GLOBAL-NEXT:    xsmulsp 0, 1, 3
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
-; GLOBAL-NEXT:  .LBB13_2:
+; GLOBAL-NEXT:  .LBB15_2:
 ; GLOBAL-NEXT:    fmr 1, 0
 ; GLOBAL-NEXT:    blr
   %rt = call fast float @llvm.sqrt.f32(float %x)
@@ -502,10 +530,10 @@ define double @fcmp_nnan(double %a, double %y, double %z) {
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    xxlxor 0, 0, 0
 ; FMF-NEXT:    xscmpudp 0, 1, 0
-; FMF-NEXT:    blt 0, .LBB14_2
+; FMF-NEXT:    blt 0, .LBB16_2
 ; FMF-NEXT:  # %bb.1:
 ; FMF-NEXT:    fmr 3, 2
-; FMF-NEXT:  .LBB14_2:
+; FMF-NEXT:  .LBB16_2:
 ; FMF-NEXT:    fmr 1, 3
 ; FMF-NEXT:    blr
 ;
@@ -513,10 +541,10 @@ define double @fcmp_nnan(double %a, double %y, double %z) {
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 ; GLOBAL-NEXT:    xscmpudp 0, 1, 0
-; GLOBAL-NEXT:    blt 0, .LBB14_2
+; GLOBAL-NEXT:    blt 0, .LBB16_2
 ; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    fmr 3, 2
-; GLOBAL-NEXT:  .LBB14_2:
+; GLOBAL-NEXT:  .LBB16_2:
 ; GLOBAL-NEXT:    fmr 1, 3
 ; GLOBAL-NEXT:    blr
   %cmp = fcmp nnan ult double %a, 0.0

diff  --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 7be19c07da80..4483de105385 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -9,17 +9,30 @@ define float @sqrt_ieee(float %f) #0 {
   ; CHECK:   liveins: $xmm0
   ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
   ; CHECK:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
+  ; CHECK:   %1:fr32 = nofpexcept VSQRTSSr killed [[DEF]], [[COPY]], implicit $mxcsr
+  ; CHECK:   $xmm0 = COPY %1
+  ; CHECK:   RET 0, $xmm0
+  %call = tail call float @llvm.sqrt.f32(float %f)
+  ret float %call
+}
+
+define float @sqrt_ieee_ninf(float %f) #0 {
+  ; CHECK-LABEL: name: sqrt_ieee_ninf
+  ; CHECK: bb.0 (%ir-block.0):
+  ; CHECK:   liveins: $xmm0
+  ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
+  ; CHECK:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
   ; CHECK:   [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
-  ; CHECK:   %3:fr32 = nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+  ; CHECK:   %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
   ; CHECK:   [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
-  ; CHECK:   %5:fr32 = nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
   ; CHECK:   [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
-  ; CHECK:   %7:fr32 = nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
-  ; CHECK:   %8:fr32 = nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
-  ; CHECK:   %9:fr32 = nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
-  ; CHECK:   %10:fr32 = nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
-  ; CHECK:   %11:fr32 = nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
-  ; CHECK:   %12:fr32 = nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+  ; CHECK:   %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+  ; CHECK:   %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+  ; CHECK:   %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
   ; CHECK:   [[COPY1:%[0-9]+]]:vr128 = COPY %12
   ; CHECK:   [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]]
   ; CHECK:   [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool)
@@ -31,7 +44,7 @@ define float @sqrt_ieee(float %f) #0 {
   ; CHECK:   [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
   ; CHECK:   $xmm0 = COPY [[COPY5]]
   ; CHECK:   RET 0, $xmm0
-  %call = tail call float @llvm.sqrt.f32(float %f)
+  %call = tail call ninf float @llvm.sqrt.f32(float %f)
   ret float %call
 }
 
@@ -41,17 +54,30 @@ define float @sqrt_daz(float %f) #1 {
   ; CHECK:   liveins: $xmm0
   ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
   ; CHECK:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
+  ; CHECK:   %1:fr32 = nofpexcept VSQRTSSr killed [[DEF]], [[COPY]], implicit $mxcsr
+  ; CHECK:   $xmm0 = COPY %1
+  ; CHECK:   RET 0, $xmm0
+  %call = tail call float @llvm.sqrt.f32(float %f)
+  ret float %call
+}
+
+define float @sqrt_daz_ninf(float %f) #1 {
+  ; CHECK-LABEL: name: sqrt_daz_ninf
+  ; CHECK: bb.0 (%ir-block.0):
+  ; CHECK:   liveins: $xmm0
+  ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
+  ; CHECK:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
   ; CHECK:   [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
-  ; CHECK:   %3:fr32 = nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+  ; CHECK:   %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
   ; CHECK:   [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
-  ; CHECK:   %5:fr32 = nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
   ; CHECK:   [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
-  ; CHECK:   %7:fr32 = nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
-  ; CHECK:   %8:fr32 = nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
-  ; CHECK:   %9:fr32 = nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
-  ; CHECK:   %10:fr32 = nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
-  ; CHECK:   %11:fr32 = nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
-  ; CHECK:   %12:fr32 = nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+  ; CHECK:   %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+  ; CHECK:   %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+  ; CHECK:   %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
   ; CHECK:   [[COPY1:%[0-9]+]]:vr128 = COPY %12
   ; CHECK:   [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS
   ; CHECK:   %15:fr32 = nofpexcept VCMPSSrr [[COPY]], killed [[FsFLD0SS]], 0, implicit $mxcsr
@@ -60,7 +86,7 @@ define float @sqrt_daz(float %f) #1 {
   ; CHECK:   [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
   ; CHECK:   $xmm0 = COPY [[COPY3]]
   ; CHECK:   RET 0, $xmm0
-  %call = tail call float @llvm.sqrt.f32(float %f)
+  %call = tail call ninf float @llvm.sqrt.f32(float %f)
   ret float %call
 }
 

diff  --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index f10199ce958f..b2593bc43578 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -59,6 +59,20 @@ define float @finite_f32_no_estimate(float %f) #0 {
 define float @finite_f32_estimate_ieee(float %f) #1 {
 ; SSE-LABEL: finite_f32_estimate_ieee:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    sqrtss %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: finite_f32_estimate_ieee:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %call = tail call float @__sqrtf_finite(float %f) #2
+  ret float %call
+}
+
+define float @finite_f32_estimate_ieee_ninf(float %f) #1 {
+; SSE-LABEL: finite_f32_estimate_ieee_ninf:
+; SSE:       # %bb.0:
 ; SSE-NEXT:    rsqrtss %xmm0, %xmm1
 ; SSE-NEXT:    movaps %xmm0, %xmm2
 ; SSE-NEXT:    mulss %xmm1, %xmm2
@@ -72,7 +86,7 @@ define float @finite_f32_estimate_ieee(float %f) #1 {
 ; SSE-NEXT:    andnps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: finite_f32_estimate_ieee:
+; AVX1-LABEL: finite_f32_estimate_ieee_ninf:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
 ; AVX1-NEXT:    vmulss %xmm1, %xmm0, %xmm2
@@ -85,7 +99,7 @@ define float @finite_f32_estimate_ieee(float %f) #1 {
 ; AVX1-NEXT:    vandnps %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
-; AVX512-LABEL: finite_f32_estimate_ieee:
+; AVX512-LABEL: finite_f32_estimate_ieee_ninf:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm2
@@ -99,13 +113,27 @@ define float @finite_f32_estimate_ieee(float %f) #1 {
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %call = tail call float @__sqrtf_finite(float %f) #2
+  %call = tail call ninf float @__sqrtf_finite(float %f) #2
   ret float %call
 }
 
 define float @finite_f32_estimate_daz(float %f) #4 {
 ; SSE-LABEL: finite_f32_estimate_daz:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    sqrtss %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: finite_f32_estimate_daz:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %call = tail call float @__sqrtf_finite(float %f) #2
+  ret float %call
+}
+
+define float @finite_f32_estimate_daz_ninf(float %f) #4 {
+; SSE-LABEL: finite_f32_estimate_daz_ninf:
+; SSE:       # %bb.0:
 ; SSE-NEXT:    rsqrtss %xmm0, %xmm1
 ; SSE-NEXT:    movaps %xmm0, %xmm2
 ; SSE-NEXT:    mulss %xmm1, %xmm2
@@ -119,7 +147,7 @@ define float @finite_f32_estimate_daz(float %f) #4 {
 ; SSE-NEXT:    andnps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: finite_f32_estimate_daz:
+; AVX1-LABEL: finite_f32_estimate_daz_ninf:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
 ; AVX1-NEXT:    vmulss %xmm1, %xmm0, %xmm2
@@ -132,7 +160,7 @@ define float @finite_f32_estimate_daz(float %f) #4 {
 ; AVX1-NEXT:    vandnps %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
-; AVX512-LABEL: finite_f32_estimate_daz:
+; AVX512-LABEL: finite_f32_estimate_daz_ninf:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm2
@@ -144,7 +172,7 @@ define float @finite_f32_estimate_daz(float %f) #4 {
 ; AVX512-NEXT:    vmovss %xmm2, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %call = tail call float @__sqrtf_finite(float %f) #2
+  %call = tail call ninf float @__sqrtf_finite(float %f) #2
   ret float %call
 }
 
@@ -175,6 +203,20 @@ define x86_fp80 @finite_f80_estimate_but_no(x86_fp80 %ld) #1 {
 define float @sqrtf_check_denorms(float %x) #3 {
 ; SSE-LABEL: sqrtf_check_denorms:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    sqrtss %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sqrtf_check_denorms:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %call = tail call float @__sqrtf_finite(float %x) #2
+  ret float %call
+}
+
+define float @sqrtf_check_denorms_ninf(float %x) #3 {
+; SSE-LABEL: sqrtf_check_denorms_ninf:
+; SSE:       # %bb.0:
 ; SSE-NEXT:    rsqrtss %xmm0, %xmm1
 ; SSE-NEXT:    movaps %xmm0, %xmm2
 ; SSE-NEXT:    mulss %xmm1, %xmm2
@@ -188,7 +230,7 @@ define float @sqrtf_check_denorms(float %x) #3 {
 ; SSE-NEXT:    andnps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sqrtf_check_denorms:
+; AVX1-LABEL: sqrtf_check_denorms_ninf:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
 ; AVX1-NEXT:    vmulss %xmm1, %xmm0, %xmm2
@@ -201,7 +243,7 @@ define float @sqrtf_check_denorms(float %x) #3 {
 ; AVX1-NEXT:    vandnps %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
-; AVX512-LABEL: sqrtf_check_denorms:
+; AVX512-LABEL: sqrtf_check_denorms_ninf:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm2
@@ -215,13 +257,27 @@ define float @sqrtf_check_denorms(float %x) #3 {
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512-NEXT:    retq
-  %call = tail call float @__sqrtf_finite(float %x) #2
+  %call = tail call ninf float @__sqrtf_finite(float %x) #2
   ret float %call
 }
 
 define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
 ; SSE-LABEL: sqrt_v4f32_check_denorms:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    sqrtps %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sqrt_v4f32_check_denorms:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vsqrtps %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %call = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
+  ret <4 x float> %call
+}
+
+define <4 x float> @sqrt_v4f32_check_denorms_ninf(<4 x float> %x) #3 {
+; SSE-LABEL: sqrt_v4f32_check_denorms_ninf:
+; SSE:       # %bb.0:
 ; SSE-NEXT:    rsqrtps %xmm0, %xmm2
 ; SSE-NEXT:    movaps %xmm0, %xmm1
 ; SSE-NEXT:    mulps %xmm2, %xmm1
@@ -237,7 +293,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sqrt_v4f32_check_denorms:
+; AVX1-LABEL: sqrt_v4f32_check_denorms_ninf:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vrsqrtps %xmm0, %xmm1
 ; AVX1-NEXT:    vmulps %xmm1, %xmm0, %xmm2
@@ -251,7 +307,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
 ; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
-; AVX512-LABEL: sqrt_v4f32_check_denorms:
+; AVX512-LABEL: sqrt_v4f32_check_denorms_ninf:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vrsqrtps %xmm0, %xmm1
 ; AVX512-NEXT:    vmulps %xmm1, %xmm0, %xmm2
@@ -266,7 +322,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
 ; AVX512-NEXT:    vcmpleps %xmm0, %xmm2, %xmm0
 ; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %call = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
+  %call = tail call ninf <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
   ret <4 x float> %call
 }
 


        


More information about the llvm-commits mailing list