[llvm] 95bcab8 - [DAGCombiner] Require ninf for sqrt recip estimation
Qiu Chaofan via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 01:24:12 PDT 2020
Author: Qiu Chaofan
Date: 2020-04-01T16:23:43+08:00
New Revision: 95bcab8272ced7444bc25353784de96ad9375c02
URL: https://github.com/llvm/llvm-project/commit/95bcab8272ced7444bc25353784de96ad9375c02
DIFF: https://github.com/llvm/llvm-project/commit/95bcab8272ced7444bc25353784de96ad9375c02.diff
LOG: [DAGCombiner] Require ninf for sqrt recip estimation
Currently, DAG combiner uses (fmul (rsqrt x) x) to estimate square
root of x. However, this method would return NaN if x is +Inf, which
is incorrect.
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D76853
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/NVPTX/fast-math.ll
llvm/test/CodeGen/NVPTX/sqrt-approx.ll
llvm/test/CodeGen/PowerPC/fmf-propagation.ll
llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
llvm/test/CodeGen/X86/sqrt-fastmath.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index df4ec4cac1a3..a74060249d4b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13109,8 +13109,12 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
SDNodeFlags Flags = N->getFlags();
- if (!DAG.getTarget().Options.UnsafeFPMath &&
- !Flags.hasApproximateFuncs())
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
+ // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
+ if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+ (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
return SDValue();
SDValue N0 = N->getOperand(0);
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index 900521664e0c..db5fb63f4e76 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -13,7 +13,7 @@ define float @sqrt_div(float %a, float %b) {
}
; CHECK-LABEL: sqrt_div_fast(
-; CHECK: sqrt.approx.f32
+; CHECK: sqrt.rn.f32
; CHECK: div.approx.f32
define float @sqrt_div_fast(float %a, float %b) #0 {
%t1 = tail call float @llvm.sqrt.f32(float %a)
@@ -21,6 +21,15 @@ define float @sqrt_div_fast(float %a, float %b) #0 {
ret float %t2
}
+; CHECK-LABEL: sqrt_div_fast_ninf(
+; CHECK: sqrt.approx.f32
+; CHECK: div.approx.f32
+define float @sqrt_div_fast_ninf(float %a, float %b) #0 {
+ %t1 = tail call ninf float @llvm.sqrt.f32(float %a)
+ %t2 = fdiv float %t1, %b
+ ret float %t2
+}
+
; CHECK-LABEL: sqrt_div_ftz(
; CHECK: sqrt.rn.ftz.f32
; CHECK: div.rn.ftz.f32
@@ -31,7 +40,7 @@ define float @sqrt_div_ftz(float %a, float %b) #1 {
}
; CHECK-LABEL: sqrt_div_fast_ftz(
-; CHECK: sqrt.approx.ftz.f32
+; CHECK: sqrt.rn.ftz.f32
; CHECK: div.approx.ftz.f32
define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
%t1 = tail call float @llvm.sqrt.f32(float %a)
@@ -39,12 +48,20 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
ret float %t2
}
+; CHECK-LABEL: sqrt_div_fast_ftz_ninf(
+; CHECK: sqrt.approx.ftz.f32
+; CHECK: div.approx.ftz.f32
+define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 {
+ %t1 = tail call ninf float @llvm.sqrt.f32(float %a)
+ %t2 = fdiv float %t1, %b
+ ret float %t2
+}
+
; There are no fast-math or ftz versions of sqrt and div for f64. We use
; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
; CHECK-LABEL: sqrt_div_fast_ftz_f64(
-; CHECK: rsqrt.approx.f64
-; CHECK: rcp.approx.ftz.f64
+; CHECK: sqrt.rn.f64
; CHECK: div.rn.f64
define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
%t1 = tail call double @llvm.sqrt.f64(double %a)
@@ -52,6 +69,16 @@ define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
ret double %t2
}
+; CHECK-LABEL: sqrt_div_fast_ftz_f64_ninf(
+; CHECK: rsqrt.approx.f64
+; CHECK: rcp.approx.ftz.f64
+; CHECK: div.rn.f64
+define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 {
+ %t1 = tail call ninf double @llvm.sqrt.f64(double %a)
+ %t2 = fdiv double %t1, %b
+ ret double %t2
+}
+
; CHECK-LABEL: rsqrt(
; CHECK-NOT: rsqrt.approx
; CHECK: sqrt.rn.f32
diff --git a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
index a8590b7c43ab..465b696c7610 100644
--- a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
@@ -45,35 +45,63 @@ define double @test_rsqrt64_ftz(double %a) #0 #1 {
; CHECK-LABEL test_sqrt32
define float @test_sqrt32(float %a) #0 {
-; CHECK: sqrt.approx.f32
+; CHECK: sqrt.rn.f32
%ret = tail call float @llvm.sqrt.f32(float %a)
ret float %ret
}
+; CHECK-LABEL test_sqrt32_ninf
+define float @test_sqrt32_ninf(float %a) #0 {
+; CHECK: sqrt.approx.f32
+ %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+ ret float %ret
+}
+
; CHECK-LABEL test_sqrt_ftz
define float @test_sqrt_ftz(float %a) #0 #1 {
-; CHECK: sqrt.approx.ftz.f32
+; CHECK: sqrt.rn.ftz.f32
%ret = tail call float @llvm.sqrt.f32(float %a)
ret float %ret
}
+; CHECK-LABEL test_sqrt_ftz_ninf
+define float @test_sqrt_ftz_ninf(float %a) #0 #1 {
+; CHECK: sqrt.approx.ftz.f32
+ %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+ ret float %ret
+}
+
; CHECK-LABEL test_sqrt64
define double @test_sqrt64(double %a) #0 {
+; CHECK: sqrt.rn.f64
+ %ret = tail call double @llvm.sqrt.f64(double %a)
+ ret double %ret
+}
+
+; CHECK-LABEL test_sqrt64_ninf
+define double @test_sqrt64_ninf(double %a) #0 {
; There's no sqrt.approx.f64 instruction; we emit
; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal,
; so we just use the ftz version.
; CHECK: rsqrt.approx.f64
; CHECK: rcp.approx.ftz.f64
- %ret = tail call double @llvm.sqrt.f64(double %a)
+ %ret = tail call ninf double @llvm.sqrt.f64(double %a)
ret double %ret
}
; CHECK-LABEL test_sqrt64_ftz
define double @test_sqrt64_ftz(double %a) #0 #1 {
+; CHECK: sqrt.rn.f64
+ %ret = tail call double @llvm.sqrt.f64(double %a)
+ ret double %ret
+}
+
+; CHECK-LABEL test_sqrt64_ftz_ninf
+define double @test_sqrt64_ftz_ninf(double %a) #0 #1 {
; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
; CHECK: rsqrt.approx.f64
; CHECK: rcp.approx.ftz.f64
- %ret = tail call double @llvm.sqrt.f64(double %a)
+ %ret = tail call ninf double @llvm.sqrt.f64(double %a)
ret double %ret
}
@@ -92,11 +120,18 @@ define float @test_rsqrt32_refined(float %a) #0 #2 {
; CHECK-LABEL: test_sqrt32_refined
define float @test_sqrt32_refined(float %a) #0 #2 {
-; CHECK: rsqrt.approx.f32
+; CHECK: sqrt.rn.f32
%ret = tail call float @llvm.sqrt.f32(float %a)
ret float %ret
}
+; CHECK-LABEL: test_sqrt32_refined_ninf
+define float @test_sqrt32_refined_ninf(float %a) #0 #2 {
+; CHECK: rsqrt.approx.f32
+ %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+ ret float %ret
+}
+
; CHECK-LABEL: test_rsqrt64_refined
define double @test_rsqrt64_refined(double %a) #0 #2 {
; CHECK: rsqrt.approx.f64
@@ -107,11 +142,18 @@ define double @test_rsqrt64_refined(double %a) #0 #2 {
; CHECK-LABEL: test_sqrt64_refined
define double @test_sqrt64_refined(double %a) #0 #2 {
-; CHECK: rsqrt.approx.f64
+; CHECK: sqrt.rn.f64
%ret = tail call double @llvm.sqrt.f64(double %a)
ret double %ret
}
+; CHECK-LABEL: test_sqrt64_refined_ninf
+define double @test_sqrt64_refined_ninf(double %a) #0 #2 {
+; CHECK: rsqrt.approx.f64
+ %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+ ret double %ret
+}
+
; -- refined sqrt and rsqrt with ftz enabled --
; CHECK-LABEL: test_rsqrt32_refined_ftz
@@ -124,11 +166,18 @@ define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
; CHECK-LABEL: test_sqrt32_refined_ftz
define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
-; CHECK: rsqrt.approx.ftz.f32
+; CHECK: sqrt.rn.ftz.f32
%ret = tail call float @llvm.sqrt.f32(float %a)
ret float %ret
}
+; CHECK-LABEL: test_sqrt32_refined_ftz_ninf
+define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 {
+; CHECK: rsqrt.approx.ftz.f32
+ %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+ ret float %ret
+}
+
; CHECK-LABEL: test_rsqrt64_refined_ftz
define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version.
@@ -140,11 +189,18 @@ define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
; CHECK-LABEL: test_sqrt64_refined_ftz
define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
-; CHECK: rsqrt.approx.f64
+; CHECK: sqrt.rn.f64
%ret = tail call double @llvm.sqrt.f64(double %a)
ret double %ret
}
+; CHECK-LABEL: test_sqrt64_refined_ftz_ninf
+define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 {
+; CHECK: rsqrt.approx.f64
+ %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+ ret double %ret
+}
+
attributes #0 = { "unsafe-fp-math" = "true" }
attributes #1 = { "denormal-fp-math-f32" = "preserve-sign,preserve-sign" }
attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index 222583638d59..59a7d233c0c3 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -270,11 +270,11 @@ define float @fmul_fma_fast2(float %x) {
; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
-; FMFDEBUG: fmul afn {{t[0-9]+}}
+; FMFDEBUG: fmul ninf afn {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
-; GLOBALDEBUG: fmul afn {{t[0-9]+}}
+; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
define float @sqrt_afn_ieee(float %x) #0 {
@@ -321,17 +321,31 @@ define float @sqrt_afn_ieee(float %x) #0 {
; GLOBAL-NEXT: xsmulsp 0, 0, 2
; GLOBAL-NEXT: .LBB10_2:
; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: blr
+ %rt = call afn ninf float @llvm.sqrt.f32(float %x)
+ ret float %rt
+}
+
+define float @sqrt_afn_ieee_inf(float %x) #0 {
+; FMF-LABEL: sqrt_afn_ieee_inf:
+; FMF: # %bb.0:
+; FMF-NEXT: xssqrtsp 1, 1
+; FMF-NEXT: blr
+;
+; GLOBAL-LABEL: sqrt_afn_ieee_inf:
+; GLOBAL: # %bb.0:
+; GLOBAL-NEXT: xssqrtsp 1, 1
; GLOBAL-NEXT: blr
%rt = call afn float @llvm.sqrt.f32(float %x)
ret float %rt
}
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
-; FMFDEBUG: fmul afn {{t[0-9]+}}
+; FMFDEBUG: fmul ninf afn {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
-; GLOBALDEBUG: fmul afn {{t[0-9]+}}
+; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
define float @sqrt_afn_preserve_sign(float %x) #1 {
@@ -339,19 +353,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
; FMF: # %bb.0:
; FMF-NEXT: xxlxor 0, 0, 0
; FMF-NEXT: fcmpu 0, 1, 0
-; FMF-NEXT: beq 0, .LBB11_2
+; FMF-NEXT: beq 0, .LBB12_2
; FMF-NEXT: # %bb.1:
; FMF-NEXT: xsrsqrtesp 0, 1
-; FMF-NEXT: addis 3, 2, .LCPI11_0 at toc@ha
-; FMF-NEXT: addis 4, 2, .LCPI11_1 at toc@ha
-; FMF-NEXT: lfs 2, .LCPI11_0 at toc@l(3)
-; FMF-NEXT: lfs 3, .LCPI11_1 at toc@l(4)
+; FMF-NEXT: addis 3, 2, .LCPI12_0 at toc@ha
+; FMF-NEXT: addis 4, 2, .LCPI12_1 at toc@ha
+; FMF-NEXT: lfs 2, .LCPI12_0 at toc@l(3)
+; FMF-NEXT: lfs 3, .LCPI12_1 at toc@l(4)
; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: xsmulsp 0, 1, 0
; FMF-NEXT: xsmulsp 1, 1, 2
; FMF-NEXT: xsaddsp 0, 0, 3
; FMF-NEXT: xsmulsp 0, 1, 0
-; FMF-NEXT: .LBB11_2:
+; FMF-NEXT: .LBB12_2:
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr
;
@@ -359,19 +373,33 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xxlxor 0, 0, 0
; GLOBAL-NEXT: fcmpu 0, 1, 0
-; GLOBAL-NEXT: beq 0, .LBB11_2
+; GLOBAL-NEXT: beq 0, .LBB12_2
; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
-; GLOBAL-NEXT: addis 3, 2, .LCPI11_0 at toc@ha
-; GLOBAL-NEXT: addis 4, 2, .LCPI11_1 at toc@ha
-; GLOBAL-NEXT: lfs 2, .LCPI11_0 at toc@l(3)
-; GLOBAL-NEXT: lfs 3, .LCPI11_1 at toc@l(4)
+; GLOBAL-NEXT: addis 3, 2, .LCPI12_0 at toc@ha
+; GLOBAL-NEXT: addis 4, 2, .LCPI12_1 at toc@ha
+; GLOBAL-NEXT: lfs 2, .LCPI12_0 at toc@l(3)
+; GLOBAL-NEXT: lfs 3, .LCPI12_1 at toc@l(4)
; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
; GLOBAL-NEXT: xsmulsp 0, 1, 3
; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB11_2:
+; GLOBAL-NEXT: .LBB12_2:
; GLOBAL-NEXT: fmr 1, 0
+; GLOBAL-NEXT: blr
+ %rt = call afn ninf float @llvm.sqrt.f32(float %x)
+ ret float %rt
+}
+
+define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
+; FMF-LABEL: sqrt_afn_preserve_sign_inf:
+; FMF: # %bb.0:
+; FMF-NEXT: xssqrtsp 1, 1
+; FMF-NEXT: blr
+;
+; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf:
+; GLOBAL: # %bb.0:
+; GLOBAL-NEXT: xssqrtsp 1, 1
; GLOBAL-NEXT: blr
%rt = call afn float @llvm.sqrt.f32(float %x)
ret float %rt
@@ -390,45 +418,45 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
define float @sqrt_fast_ieee(float %x) #0 {
; FMF-LABEL: sqrt_fast_ieee:
; FMF: # %bb.0:
-; FMF-NEXT: addis 3, 2, .LCPI12_2 at toc@ha
+; FMF-NEXT: addis 3, 2, .LCPI14_2 at toc@ha
; FMF-NEXT: fabs 0, 1
-; FMF-NEXT: lfs 2, .LCPI12_2 at toc@l(3)
+; FMF-NEXT: lfs 2, .LCPI14_2 at toc@l(3)
; FMF-NEXT: fcmpu 0, 0, 2
; FMF-NEXT: xxlxor 0, 0, 0
-; FMF-NEXT: blt 0, .LBB12_2
+; FMF-NEXT: blt 0, .LBB14_2
; FMF-NEXT: # %bb.1:
; FMF-NEXT: xsrsqrtesp 0, 1
-; FMF-NEXT: addis 3, 2, .LCPI12_0 at toc@ha
-; FMF-NEXT: addis 4, 2, .LCPI12_1 at toc@ha
-; FMF-NEXT: lfs 2, .LCPI12_0 at toc@l(3)
-; FMF-NEXT: lfs 3, .LCPI12_1 at toc@l(4)
+; FMF-NEXT: addis 3, 2, .LCPI14_0 at toc@ha
+; FMF-NEXT: addis 4, 2, .LCPI14_1 at toc@ha
+; FMF-NEXT: lfs 2, .LCPI14_0 at toc@l(3)
+; FMF-NEXT: lfs 3, .LCPI14_1 at toc@l(4)
; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: xsmaddasp 2, 1, 0
; FMF-NEXT: xsmulsp 0, 1, 3
; FMF-NEXT: xsmulsp 0, 0, 2
-; FMF-NEXT: .LBB12_2:
+; FMF-NEXT: .LBB14_2:
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr
;
; GLOBAL-LABEL: sqrt_fast_ieee:
; GLOBAL: # %bb.0:
-; GLOBAL-NEXT: addis 3, 2, .LCPI12_2 at toc@ha
+; GLOBAL-NEXT: addis 3, 2, .LCPI14_2 at toc@ha
; GLOBAL-NEXT: fabs 0, 1
-; GLOBAL-NEXT: lfs 2, .LCPI12_2 at toc@l(3)
+; GLOBAL-NEXT: lfs 2, .LCPI14_2 at toc@l(3)
; GLOBAL-NEXT: fcmpu 0, 0, 2
; GLOBAL-NEXT: xxlxor 0, 0, 0
-; GLOBAL-NEXT: blt 0, .LBB12_2
+; GLOBAL-NEXT: blt 0, .LBB14_2
; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
-; GLOBAL-NEXT: addis 3, 2, .LCPI12_0 at toc@ha
-; GLOBAL-NEXT: addis 4, 2, .LCPI12_1 at toc@ha
-; GLOBAL-NEXT: lfs 2, .LCPI12_0 at toc@l(3)
-; GLOBAL-NEXT: lfs 3, .LCPI12_1 at toc@l(4)
+; GLOBAL-NEXT: addis 3, 2, .LCPI14_0 at toc@ha
+; GLOBAL-NEXT: addis 4, 2, .LCPI14_1 at toc@ha
+; GLOBAL-NEXT: lfs 2, .LCPI14_0 at toc@l(3)
+; GLOBAL-NEXT: lfs 3, .LCPI14_1 at toc@l(4)
; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
; GLOBAL-NEXT: xsmulsp 0, 1, 3
; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB12_2:
+; GLOBAL-NEXT: .LBB14_2:
; GLOBAL-NEXT: fmr 1, 0
; GLOBAL-NEXT: blr
%rt = call fast float @llvm.sqrt.f32(float %x)
@@ -450,18 +478,18 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
; FMF: # %bb.0:
; FMF-NEXT: xxlxor 0, 0, 0
; FMF-NEXT: fcmpu 0, 1, 0
-; FMF-NEXT: beq 0, .LBB13_2
+; FMF-NEXT: beq 0, .LBB15_2
; FMF-NEXT: # %bb.1:
; FMF-NEXT: xsrsqrtesp 0, 1
-; FMF-NEXT: addis 3, 2, .LCPI13_0 at toc@ha
-; FMF-NEXT: addis 4, 2, .LCPI13_1 at toc@ha
-; FMF-NEXT: lfs 2, .LCPI13_0 at toc@l(3)
-; FMF-NEXT: lfs 3, .LCPI13_1 at toc@l(4)
+; FMF-NEXT: addis 3, 2, .LCPI15_0 at toc@ha
+; FMF-NEXT: addis 4, 2, .LCPI15_1 at toc@ha
+; FMF-NEXT: lfs 2, .LCPI15_0 at toc@l(3)
+; FMF-NEXT: lfs 3, .LCPI15_1 at toc@l(4)
; FMF-NEXT: xsmulsp 1, 1, 0
; FMF-NEXT: xsmaddasp 2, 1, 0
; FMF-NEXT: xsmulsp 0, 1, 3
; FMF-NEXT: xsmulsp 0, 0, 2
-; FMF-NEXT: .LBB13_2:
+; FMF-NEXT: .LBB15_2:
; FMF-NEXT: fmr 1, 0
; FMF-NEXT: blr
;
@@ -469,18 +497,18 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xxlxor 0, 0, 0
; GLOBAL-NEXT: fcmpu 0, 1, 0
-; GLOBAL-NEXT: beq 0, .LBB13_2
+; GLOBAL-NEXT: beq 0, .LBB15_2
; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
-; GLOBAL-NEXT: addis 3, 2, .LCPI13_0 at toc@ha
-; GLOBAL-NEXT: addis 4, 2, .LCPI13_1 at toc@ha
-; GLOBAL-NEXT: lfs 2, .LCPI13_0 at toc@l(3)
-; GLOBAL-NEXT: lfs 3, .LCPI13_1 at toc@l(4)
+; GLOBAL-NEXT: addis 3, 2, .LCPI15_0 at toc@ha
+; GLOBAL-NEXT: addis 4, 2, .LCPI15_1 at toc@ha
+; GLOBAL-NEXT: lfs 2, .LCPI15_0 at toc@l(3)
+; GLOBAL-NEXT: lfs 3, .LCPI15_1 at toc@l(4)
; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
; GLOBAL-NEXT: xsmulsp 0, 1, 3
; GLOBAL-NEXT: xsmulsp 0, 0, 2
-; GLOBAL-NEXT: .LBB13_2:
+; GLOBAL-NEXT: .LBB15_2:
; GLOBAL-NEXT: fmr 1, 0
; GLOBAL-NEXT: blr
%rt = call fast float @llvm.sqrt.f32(float %x)
@@ -502,10 +530,10 @@ define double @fcmp_nnan(double %a, double %y, double %z) {
; FMF: # %bb.0:
; FMF-NEXT: xxlxor 0, 0, 0
; FMF-NEXT: xscmpudp 0, 1, 0
-; FMF-NEXT: blt 0, .LBB14_2
+; FMF-NEXT: blt 0, .LBB16_2
; FMF-NEXT: # %bb.1:
; FMF-NEXT: fmr 3, 2
-; FMF-NEXT: .LBB14_2:
+; FMF-NEXT: .LBB16_2:
; FMF-NEXT: fmr 1, 3
; FMF-NEXT: blr
;
@@ -513,10 +541,10 @@ define double @fcmp_nnan(double %a, double %y, double %z) {
; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xxlxor 0, 0, 0
; GLOBAL-NEXT: xscmpudp 0, 1, 0
-; GLOBAL-NEXT: blt 0, .LBB14_2
+; GLOBAL-NEXT: blt 0, .LBB16_2
; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: fmr 3, 2
-; GLOBAL-NEXT: .LBB14_2:
+; GLOBAL-NEXT: .LBB16_2:
; GLOBAL-NEXT: fmr 1, 3
; GLOBAL-NEXT: blr
%cmp = fcmp nnan ult double %a, 0.0
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 7be19c07da80..4483de105385 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -9,17 +9,30 @@ define float @sqrt_ieee(float %f) #0 {
; CHECK: liveins: $xmm0
; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
+ ; CHECK: %1:fr32 = nofpexcept VSQRTSSr killed [[DEF]], [[COPY]], implicit $mxcsr
+ ; CHECK: $xmm0 = COPY %1
+ ; CHECK: RET 0, $xmm0
+ %call = tail call float @llvm.sqrt.f32(float %f)
+ ret float %call
+}
+
+define float @sqrt_ieee_ninf(float %f) #0 {
+ ; CHECK-LABEL: name: sqrt_ieee_ninf
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK: liveins: $xmm0
+ ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
+ ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
- ; CHECK: %3:fr32 = nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+ ; CHECK: %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
- ; CHECK: %5:fr32 = nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
- ; CHECK: %7:fr32 = nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
- ; CHECK: %8:fr32 = nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
- ; CHECK: %9:fr32 = nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
- ; CHECK: %10:fr32 = nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
- ; CHECK: %11:fr32 = nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
- ; CHECK: %12:fr32 = nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+ ; CHECK: %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+ ; CHECK: %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+ ; CHECK: %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12
; CHECK: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]]
; CHECK: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool)
@@ -31,7 +44,7 @@ define float @sqrt_ieee(float %f) #0 {
; CHECK: [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
; CHECK: $xmm0 = COPY [[COPY5]]
; CHECK: RET 0, $xmm0
- %call = tail call float @llvm.sqrt.f32(float %f)
+ %call = tail call ninf float @llvm.sqrt.f32(float %f)
ret float %call
}
@@ -41,17 +54,30 @@ define float @sqrt_daz(float %f) #1 {
; CHECK: liveins: $xmm0
; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
+ ; CHECK: %1:fr32 = nofpexcept VSQRTSSr killed [[DEF]], [[COPY]], implicit $mxcsr
+ ; CHECK: $xmm0 = COPY %1
+ ; CHECK: RET 0, $xmm0
+ %call = tail call float @llvm.sqrt.f32(float %f)
+ ret float %call
+}
+
+define float @sqrt_daz_ninf(float %f) #1 {
+ ; CHECK-LABEL: name: sqrt_daz_ninf
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK: liveins: $xmm0
+ ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
+ ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
- ; CHECK: %3:fr32 = nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+ ; CHECK: %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
- ; CHECK: %5:fr32 = nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
- ; CHECK: %7:fr32 = nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
- ; CHECK: %8:fr32 = nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
- ; CHECK: %9:fr32 = nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
- ; CHECK: %10:fr32 = nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
- ; CHECK: %11:fr32 = nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
- ; CHECK: %12:fr32 = nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+ ; CHECK: %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+ ; CHECK: %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+ ; CHECK: %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12
; CHECK: [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS
; CHECK: %15:fr32 = nofpexcept VCMPSSrr [[COPY]], killed [[FsFLD0SS]], 0, implicit $mxcsr
@@ -60,7 +86,7 @@ define float @sqrt_daz(float %f) #1 {
; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
; CHECK: $xmm0 = COPY [[COPY3]]
; CHECK: RET 0, $xmm0
- %call = tail call float @llvm.sqrt.f32(float %f)
+ %call = tail call ninf float @llvm.sqrt.f32(float %f)
ret float %call
}
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index f10199ce958f..b2593bc43578 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -59,6 +59,20 @@ define float @finite_f32_no_estimate(float %f) #0 {
define float @finite_f32_estimate_ieee(float %f) #1 {
; SSE-LABEL: finite_f32_estimate_ieee:
; SSE: # %bb.0:
+; SSE-NEXT: sqrtss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: finite_f32_estimate_ieee:
+; AVX: # %bb.0:
+; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %call = tail call float @__sqrtf_finite(float %f) #2
+ ret float %call
+}
+
+define float @finite_f32_estimate_ieee_ninf(float %f) #1 {
+; SSE-LABEL: finite_f32_estimate_ieee_ninf:
+; SSE: # %bb.0:
; SSE-NEXT: rsqrtss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: mulss %xmm1, %xmm2
@@ -72,7 +86,7 @@ define float @finite_f32_estimate_ieee(float %f) #1 {
; SSE-NEXT: andnps %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: finite_f32_estimate_ieee:
+; AVX1-LABEL: finite_f32_estimate_ieee_ninf:
; AVX1: # %bb.0:
; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2
@@ -85,7 +99,7 @@ define float @finite_f32_estimate_ieee(float %f) #1 {
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
-; AVX512-LABEL: finite_f32_estimate_ieee:
+; AVX512-LABEL: finite_f32_estimate_ieee_ninf:
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2
@@ -99,13 +113,27 @@ define float @finite_f32_estimate_ieee(float %f) #1 {
; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
- %call = tail call float @__sqrtf_finite(float %f) #2
+ %call = tail call ninf float @__sqrtf_finite(float %f) #2
ret float %call
}
define float @finite_f32_estimate_daz(float %f) #4 {
; SSE-LABEL: finite_f32_estimate_daz:
; SSE: # %bb.0:
+; SSE-NEXT: sqrtss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: finite_f32_estimate_daz:
+; AVX: # %bb.0:
+; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %call = tail call float @__sqrtf_finite(float %f) #2
+ ret float %call
+}
+
+define float @finite_f32_estimate_daz_ninf(float %f) #4 {
+; SSE-LABEL: finite_f32_estimate_daz_ninf:
+; SSE: # %bb.0:
; SSE-NEXT: rsqrtss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: mulss %xmm1, %xmm2
@@ -119,7 +147,7 @@ define float @finite_f32_estimate_daz(float %f) #4 {
; SSE-NEXT: andnps %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: finite_f32_estimate_daz:
+; AVX1-LABEL: finite_f32_estimate_daz_ninf:
; AVX1: # %bb.0:
; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2
@@ -132,7 +160,7 @@ define float @finite_f32_estimate_daz(float %f) #4 {
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
-; AVX512-LABEL: finite_f32_estimate_daz:
+; AVX512-LABEL: finite_f32_estimate_daz_ninf:
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2
@@ -144,7 +172,7 @@ define float @finite_f32_estimate_daz(float %f) #4 {
; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
- %call = tail call float @__sqrtf_finite(float %f) #2
+ %call = tail call ninf float @__sqrtf_finite(float %f) #2
ret float %call
}
@@ -175,6 +203,20 @@ define x86_fp80 @finite_f80_estimate_but_no(x86_fp80 %ld) #1 {
define float @sqrtf_check_denorms(float %x) #3 {
; SSE-LABEL: sqrtf_check_denorms:
; SSE: # %bb.0:
+; SSE-NEXT: sqrtss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sqrtf_check_denorms:
+; AVX: # %bb.0:
+; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %call = tail call float @__sqrtf_finite(float %x) #2
+ ret float %call
+}
+
+define float @sqrtf_check_denorms_ninf(float %x) #3 {
+; SSE-LABEL: sqrtf_check_denorms_ninf:
+; SSE: # %bb.0:
; SSE-NEXT: rsqrtss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: mulss %xmm1, %xmm2
@@ -188,7 +230,7 @@ define float @sqrtf_check_denorms(float %x) #3 {
; SSE-NEXT: andnps %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: sqrtf_check_denorms:
+; AVX1-LABEL: sqrtf_check_denorms_ninf:
; AVX1: # %bb.0:
; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2
@@ -201,7 +243,7 @@ define float @sqrtf_check_denorms(float %x) #3 {
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
-; AVX512-LABEL: sqrtf_check_denorms:
+; AVX512-LABEL: sqrtf_check_denorms_ninf:
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2
@@ -215,13 +257,27 @@ define float @sqrtf_check_denorms(float %x) #3 {
; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
- %call = tail call float @__sqrtf_finite(float %x) #2
+ %call = tail call ninf float @__sqrtf_finite(float %x) #2
ret float %call
}
define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
; SSE-LABEL: sqrt_v4f32_check_denorms:
; SSE: # %bb.0:
+; SSE-NEXT: sqrtps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sqrt_v4f32_check_denorms:
+; AVX: # %bb.0:
+; AVX-NEXT: vsqrtps %xmm0, %xmm0
+; AVX-NEXT: retq
+ %call = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
+ ret <4 x float> %call
+}
+
+define <4 x float> @sqrt_v4f32_check_denorms_ninf(<4 x float> %x) #3 {
+; SSE-LABEL: sqrt_v4f32_check_denorms_ninf:
+; SSE: # %bb.0:
; SSE-NEXT: rsqrtps %xmm0, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: mulps %xmm2, %xmm1
@@ -237,7 +293,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: sqrt_v4f32_check_denorms:
+; AVX1-LABEL: sqrt_v4f32_check_denorms_ninf:
; AVX1: # %bb.0:
; AVX1-NEXT: vrsqrtps %xmm0, %xmm1
; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm2
@@ -251,7 +307,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
-; AVX512-LABEL: sqrt_v4f32_check_denorms:
+; AVX512-LABEL: sqrt_v4f32_check_denorms_ninf:
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtps %xmm0, %xmm1
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm2
@@ -266,7 +322,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
; AVX512-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %call = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
+ %call = tail call ninf <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
ret <4 x float> %call
}
More information about the llvm-commits
mailing list