[llvm] a4e71f0 - Assume ieee behavior without denormal-fp-math attribute
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 09:11:42 PST 2020
Author: Matt Arsenault
Date: 2020-03-07T12:10:56-05:00
New Revision: a4e71f01c08fbaeaccfe3e11cc08790432cc7e45
URL: https://github.com/llvm/llvm-project/commit/a4e71f01c08fbaeaccfe3e11cc08790432cc7e45
DIFF: https://github.com/llvm/llvm-project/commit/a4e71f01c08fbaeaccfe3e11cc08790432cc7e45.diff
LOG: Assume ieee behavior without denormal-fp-math attribute
Added:
Modified:
clang/lib/CodeGen/CGCall.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/CodeGen/denormalfpmode.c
clang/test/CodeGenCUDA/flush-denormals.cu
clang/test/CodeGenCUDA/propagate-metadata.cu
clang/test/Driver/default-denormal-fp-math.c
clang/test/Driver/denormal-fp-math.c
llvm/lib/CodeGen/MachineFunction.cpp
llvm/test/CodeGen/X86/pow.ll
llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
llvm/test/CodeGen/X86/sqrt-fastmath.ll
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 42d5467c63dc..1188ea39ba2c 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1748,11 +1748,10 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
if (CodeGenOpts.NullPointerIsValid)
FuncAttrs.addAttribute("null-pointer-is-valid", "true");
- // TODO: Omit attribute when the default is IEEE.
- if (CodeGenOpts.FPDenormalMode.isValid())
+ if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::getIEEE())
FuncAttrs.addAttribute("denormal-fp-math",
CodeGenOpts.FPDenormalMode.str());
- if (CodeGenOpts.FP32DenormalMode.isValid()) {
+ if (CodeGenOpts.FP32DenormalMode != CodeGenOpts.FPDenormalMode) {
FuncAttrs.addAttribute(
"denormal-fp-math-f32",
CodeGenOpts.FP32DenormalMode.str());
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a998561a218b..3ca034e69b3b 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2824,8 +2824,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
} else if (TrappingMathPresent)
CmdArgs.push_back("-fno-trapping-math");
- // TODO: Omit flag for the default IEEE instead
- if (DenormalFPMath.isValid()) {
+ // The default is IEEE.
+ if (DenormalFPMath != llvm::DenormalMode::getIEEE()) {
llvm::SmallString<64> DenormFlag;
llvm::raw_svector_ostream ArgStr(DenormFlag);
ArgStr << "-fdenormal-fp-math=" << DenormalFPMath;
diff --git a/clang/test/CodeGen/denormalfpmode.c b/clang/test/CodeGen/denormalfpmode.c
index 3b9ad0d7273b..7fd2b0b42e9d 100644
--- a/clang/test/CodeGen/denormalfpmode.c
+++ b/clang/test/CodeGen/denormalfpmode.c
@@ -3,7 +3,9 @@
// RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-PZ
// CHECK-LABEL: main
-// CHECK-IEEE: attributes #0 = {{.*}}"denormal-fp-math"="ieee,ieee"{{.*}}
+
+// The ieee,ieee is the default, so omit the attribute
+// CHECK-IEEE-NOT:"denormal-fp-math"
// CHECK-PS: attributes #0 = {{.*}}"denormal-fp-math"="preserve-sign,preserve-sign"{{.*}}
// CHECK-PZ: attributes #0 = {{.*}}"denormal-fp-math"="positive-zero,positive-zero"{{.*}}
diff --git a/clang/test/CodeGenCUDA/flush-denormals.cu b/clang/test/CodeGenCUDA/flush-denormals.cu
index 275338635bc6..8577fca92866 100644
--- a/clang/test/CodeGenCUDA/flush-denormals.cu
+++ b/clang/test/CodeGenCUDA/flush-denormals.cu
@@ -39,7 +39,7 @@
extern "C" __device__ void foo() {}
// FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
-// NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee,ieee"
+// NOFTZ-NOT: "denormal-fp-math-f32"
// AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals
// AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals
diff --git a/clang/test/CodeGenCUDA/propagate-metadata.cu b/clang/test/CodeGenCUDA/propagate-metadata.cu
index 2514eeb55d20..a8cabe6b6ced 100644
--- a/clang/test/CodeGenCUDA/propagate-metadata.cu
+++ b/clang/test/CodeGenCUDA/propagate-metadata.cu
@@ -60,9 +60,9 @@ __global__ void kernel() { lib_fn(); }
// CHECK-SAME: convergent
// CHECK-SAME: norecurse
-// FTZ: "denormal-fp-math"="ieee,ieee"
+// FTZ-NOT: "denormal-fp-math"
// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
-// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
+// NOFTZ-NOT: "denormal-fp-math-f32"
// CHECK-SAME: "no-trapping-math"="true"
@@ -75,11 +75,11 @@ __global__ void kernel() { lib_fn(); }
// CHECK-SAME: convergent
// CHECK-NOT: norecurse
-// FTZ-SAME: "denormal-fp-math"="ieee,ieee"
-// NOFTZ-SAME: "denormal-fp-math"="ieee,ieee"
+// FTZ-NOT: "denormal-fp-math"
+// NOFTZ-NOT: "denormal-fp-math"
// FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
-// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
+// NOFTZ-NOT: "denormal-fp-math-f32"
// CHECK-SAME: "no-trapping-math"="true"
diff --git a/clang/test/Driver/default-denormal-fp-math.c b/clang/test/Driver/default-denormal-fp-math.c
index 9cbc645345c3..5f87e151df49 100644
--- a/clang/test/Driver/default-denormal-fp-math.c
+++ b/clang/test/Driver/default-denormal-fp-math.c
@@ -14,6 +14,6 @@
// RUN: %clang -### -target x86_64-scei-ps4 -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-PRESERVESIGN %s
-
-// CHECK-IEEE: -fdenormal-fp-math=ieee,ieee
+// Flag omitted for default
+// CHECK-IEEE-NOT: -fdenormal-fp-math
// CHECK-PRESERVESIGN: -fdenormal-fp-math=preserve-sign,preserve-sign
diff --git a/clang/test/Driver/denormal-fp-math.c b/clang/test/Driver/denormal-fp-math.c
index 63a2c3b7e003..ea4dc8699ecc 100644
--- a/clang/test/Driver/denormal-fp-math.c
+++ b/clang/test/Driver/denormal-fp-math.c
@@ -8,8 +8,8 @@
// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,ieee -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID2 %s
// RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID3 %s
-// TODO: ieee is the implied default, and the flag is not passed.
-// CHECK-IEEE: "-fdenormal-fp-math=ieee,ieee"
+// IEEE is the implied default, and the flag is not passed.
+// CHECK-IEEE-NOT: -fdenormal-fp-math=
// CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign"
// CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero"
// CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee"
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 25a5f0a92c0d..d73417aa1577 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -284,15 +284,7 @@ DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const
// TODO: Should probably avoid the connection to the IR and store directly
// in the MachineFunction.
Attribute Attr = F.getFnAttribute("denormal-fp-math");
-
- // FIXME: This should assume IEEE behavior on an unspecified
- // attribute. However, the one current user incorrectly assumes a non-IEEE
- // target by default.
- StringRef Val = Attr.getValueAsString();
- if (Val.empty())
- return DenormalMode::getInvalid();
-
- return parseDenormalFPAttribute(Val);
+ return parseDenormalFPAttribute(Attr.getValueAsString());
}
/// Should we be emitting segmented stack stuff for the function
diff --git a/llvm/test/CodeGen/X86/pow.ll b/llvm/test/CodeGen/X86/pow.ll
index 52e9ebbe852e..f3d713f5f224 100644
--- a/llvm/test/CodeGen/X86/pow.ll
+++ b/llvm/test/CodeGen/X86/pow.ll
@@ -9,8 +9,42 @@ declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80)
-define float @pow_f32_one_fourth_fmf(float %x) nounwind {
-; CHECK-LABEL: pow_f32_one_fourth_fmf:
+define float @pow_f32_one_fourth_fmf_ieee(float %x) nounwind {
+; CHECK-LABEL: pow_f32_one_fourth_fmf_ieee:
+; CHECK: # %bb.0:
+; CHECK-NEXT: rsqrtss %xmm0, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm3
+; CHECK-NEXT: mulss %xmm1, %xmm3
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: movaps %xmm3, %xmm4
+; CHECK-NEXT: mulss %xmm2, %xmm4
+; CHECK-NEXT: mulss %xmm1, %xmm3
+; CHECK-NEXT: movss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT: addss %xmm5, %xmm3
+; CHECK-NEXT: mulss %xmm4, %xmm3
+; CHECK-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT: andps %xmm1, %xmm0
+; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: cmpltss %xmm4, %xmm0
+; CHECK-NEXT: andnps %xmm3, %xmm0
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: rsqrtss %xmm0, %xmm3
+; CHECK-NEXT: andps %xmm0, %xmm1
+; CHECK-NEXT: mulss %xmm3, %xmm0
+; CHECK-NEXT: mulss %xmm0, %xmm2
+; CHECK-NEXT: mulss %xmm3, %xmm0
+; CHECK-NEXT: addss %xmm5, %xmm0
+; CHECK-NEXT: mulss %xmm2, %xmm0
+; CHECK-NEXT: cmpltss %xmm4, %xmm1
+; CHECK-NEXT: andnps %xmm0, %xmm1
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01)
+ ret float %r
+}
+
+define float @pow_f32_one_fourth_fmf_daz(float %x) #0 {
+; CHECK-LABEL: pow_f32_one_fourth_fmf_daz:
; CHECK: # %bb.0:
; CHECK-NEXT: rsqrtss %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm0, %xmm2
@@ -60,21 +94,26 @@ define <4 x float> @pow_v4f32_one_fourth_fmf(<4 x float> %x) nounwind {
; CHECK-NEXT: movaps %xmm2, %xmm4
; CHECK-NEXT: mulps %xmm3, %xmm4
; CHECK-NEXT: mulps %xmm1, %xmm2
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
-; CHECK-NEXT: addps %xmm1, %xmm2
+; CHECK-NEXT: movaps {{.*#+}} xmm5 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
+; CHECK-NEXT: addps %xmm5, %xmm2
; CHECK-NEXT: mulps %xmm4, %xmm2
-; CHECK-NEXT: xorps %xmm4, %xmm4
-; CHECK-NEXT: cmpneqps %xmm4, %xmm0
-; CHECK-NEXT: andps %xmm2, %xmm0
-; CHECK-NEXT: rsqrtps %xmm0, %xmm2
-; CHECK-NEXT: movaps %xmm0, %xmm5
-; CHECK-NEXT: mulps %xmm2, %xmm5
-; CHECK-NEXT: mulps %xmm5, %xmm3
-; CHECK-NEXT: mulps %xmm2, %xmm5
-; CHECK-NEXT: addps %xmm1, %xmm5
-; CHECK-NEXT: mulps %xmm3, %xmm5
-; CHECK-NEXT: cmpneqps %xmm4, %xmm0
-; CHECK-NEXT: andps %xmm5, %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT: andps %xmm4, %xmm0
+; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
+; CHECK-NEXT: movaps %xmm1, %xmm6
+; CHECK-NEXT: cmpleps %xmm0, %xmm6
+; CHECK-NEXT: andps %xmm2, %xmm6
+; CHECK-NEXT: rsqrtps %xmm6, %xmm0
+; CHECK-NEXT: movaps %xmm6, %xmm2
+; CHECK-NEXT: mulps %xmm0, %xmm2
+; CHECK-NEXT: mulps %xmm2, %xmm3
+; CHECK-NEXT: mulps %xmm0, %xmm2
+; CHECK-NEXT: addps %xmm5, %xmm2
+; CHECK-NEXT: mulps %xmm3, %xmm2
+; CHECK-NEXT: andps %xmm4, %xmm6
+; CHECK-NEXT: cmpleps %xmm6, %xmm1
+; CHECK-NEXT: andps %xmm2, %xmm1
+; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 2.5e-1, float 2.5e-1, float 2.5e-01, float 2.5e-01>)
ret <4 x float> %r
@@ -228,3 +267,4 @@ define double @pow_f64_not_enough_fmf(double %x) nounwind {
ret double %r
}
+attributes #0 = { nounwind "denormal-fp-math"="ieee,preserve-sign" }
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 152833edc28d..7be19c07da80 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -1,10 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=finalize-isel 2>&1 | FileCheck %s
-declare float @llvm.sqrt.f32(float) #0
+declare float @llvm.sqrt.f32(float) #2
-define float @foo(float %f) #0 {
- ; CHECK-LABEL: name: foo
+define float @sqrt_ieee(float %f) #0 {
+ ; CHECK-LABEL: name: sqrt_ieee
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK: liveins: $xmm0
+ ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
+ ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
+ ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+ ; CHECK: %3:fr32 = nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+ ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
+ ; CHECK: %5:fr32 = nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
+ ; CHECK: %7:fr32 = nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %8:fr32 = nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+ ; CHECK: %9:fr32 = nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+ ; CHECK: %10:fr32 = nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %11:fr32 = nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %12:fr32 = nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+ ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12
+ ; CHECK: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]]
+ ; CHECK: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool)
+ ; CHECK: [[VPANDrr:%[0-9]+]]:vr128 = VPANDrr killed [[COPY2]], killed [[VPBROADCASTDrm]]
+ ; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDrr]]
+ ; CHECK: %18:fr32 = nofpexcept VCMPSSrm killed [[COPY3]], $rip, 1, $noreg, %const.3, $noreg, 1, implicit $mxcsr :: (load 4 from constant-pool)
+ ; CHECK: [[COPY4:%[0-9]+]]:vr128 = COPY %18
+ ; CHECK: [[VPANDNrr:%[0-9]+]]:vr128 = VPANDNrr killed [[COPY4]], killed [[COPY1]]
+ ; CHECK: [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
+ ; CHECK: $xmm0 = COPY [[COPY5]]
+ ; CHECK: RET 0, $xmm0
+ %call = tail call float @llvm.sqrt.f32(float %f)
+ ret float %call
+}
+
+define float @sqrt_daz(float %f) #1 {
+ ; CHECK-LABEL: name: sqrt_daz
; CHECK: bb.0 (%ir-block.0):
; CHECK: liveins: $xmm0
; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
@@ -28,12 +60,36 @@ define float @foo(float %f) #0 {
; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
; CHECK: $xmm0 = COPY [[COPY3]]
; CHECK: RET 0, $xmm0
- %call = tail call float @llvm.sqrt.f32(float %f) #1
+ %call = tail call float @llvm.sqrt.f32(float %f)
ret float %call
}
-define float @rfoo(float %f) #0 {
- ; CHECK-LABEL: name: rfoo
+define float @rsqrt_ieee(float %f) #0 {
+ ; CHECK-LABEL: name: rsqrt_ieee
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK: liveins: $xmm0
+ ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
+ ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
+ ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+ ; CHECK: %3:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+ ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
+ ; CHECK: %5:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
+ ; CHECK: %7:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %8:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+ ; CHECK: %9:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+ ; CHECK: %10:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VFMADD213SSr %8, killed %9, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %11:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr %8, [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %12:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+ ; CHECK: $xmm0 = COPY %12
+ ; CHECK: RET 0, $xmm0
+ %sqrt = tail call float @llvm.sqrt.f32(float %f)
+ %div = fdiv fast float 1.0, %sqrt
+ ret float %div
+}
+
+define float @rsqrt_daz(float %f) #1 {
+ ; CHECK-LABEL: name: rsqrt_daz
; CHECK: bb.0 (%ir-block.0):
; CHECK: liveins: $xmm0
; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
@@ -56,5 +112,6 @@ define float @rfoo(float %f) #0 {
ret float %div
}
-attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,ieee" }
+attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,preserve-sign" }
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index 3986c8f863d7..f10199ce958f 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -56,8 +56,55 @@ define float @finite_f32_no_estimate(float %f) #0 {
ret float %call
}
-define float @finite_f32_estimate(float %f) #1 {
-; SSE-LABEL: finite_f32_estimate:
+define float @finite_f32_estimate_ieee(float %f) #1 {
+; SSE-LABEL: finite_f32_estimate_ieee:
+; SSE: # %bb.0:
+; SSE-NEXT: rsqrtss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: mulss %xmm1, %xmm2
+; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-NEXT: mulss %xmm2, %xmm3
+; SSE-NEXT: mulss %xmm1, %xmm2
+; SSE-NEXT: addss {{.*}}(%rip), %xmm2
+; SSE-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE-NEXT: mulss %xmm3, %xmm2
+; SSE-NEXT: cmpltss {{.*}}(%rip), %xmm0
+; SSE-NEXT: andnps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: finite_f32_estimate_ieee:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
+; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vcmpltss {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: finite_f32_estimate_ieee:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
+; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vcmpltss {{.*}}(%rip), %xmm0, %k1
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; AVX512-NEXT: vmovaps %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %call = tail call float @__sqrtf_finite(float %f) #2
+ ret float %call
+}
+
+define float @finite_f32_estimate_daz(float %f) #4 {
+; SSE-LABEL: finite_f32_estimate_daz:
; SSE: # %bb.0:
; SSE-NEXT: rsqrtss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
@@ -72,7 +119,7 @@ define float @finite_f32_estimate(float %f) #1 {
; SSE-NEXT: andnps %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: finite_f32_estimate:
+; AVX1-LABEL: finite_f32_estimate_daz:
; AVX1: # %bb.0:
; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2
@@ -85,7 +132,7 @@ define float @finite_f32_estimate(float %f) #1 {
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
-; AVX512-LABEL: finite_f32_estimate:
+; AVX512-LABEL: finite_f32_estimate_daz:
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2
@@ -516,4 +563,4 @@ attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sq
attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" }
attributes #2 = { nounwind readnone }
attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee" }
-
+attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }
More information about the llvm-commits
mailing list