[llvm] a4e71f0 - Assume ieee behavior without denormal-fp-math attribute

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 7 09:11:42 PST 2020


Author: Matt Arsenault
Date: 2020-03-07T12:10:56-05:00
New Revision: a4e71f01c08fbaeaccfe3e11cc08790432cc7e45

URL: https://github.com/llvm/llvm-project/commit/a4e71f01c08fbaeaccfe3e11cc08790432cc7e45
DIFF: https://github.com/llvm/llvm-project/commit/a4e71f01c08fbaeaccfe3e11cc08790432cc7e45.diff

LOG: Assume ieee behavior without denormal-fp-math attribute

Added: 
    

Modified: 
    clang/lib/CodeGen/CGCall.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/test/CodeGen/denormalfpmode.c
    clang/test/CodeGenCUDA/flush-denormals.cu
    clang/test/CodeGenCUDA/propagate-metadata.cu
    clang/test/Driver/default-denormal-fp-math.c
    clang/test/Driver/denormal-fp-math.c
    llvm/lib/CodeGen/MachineFunction.cpp
    llvm/test/CodeGen/X86/pow.ll
    llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
    llvm/test/CodeGen/X86/sqrt-fastmath.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 42d5467c63dc..1188ea39ba2c 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1748,11 +1748,10 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
     if (CodeGenOpts.NullPointerIsValid)
       FuncAttrs.addAttribute("null-pointer-is-valid", "true");
 
-    // TODO: Omit attribute when the default is IEEE.
-    if (CodeGenOpts.FPDenormalMode.isValid())
+    if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::getIEEE())
       FuncAttrs.addAttribute("denormal-fp-math",
                              CodeGenOpts.FPDenormalMode.str());
-    if (CodeGenOpts.FP32DenormalMode.isValid()) {
+    if (CodeGenOpts.FP32DenormalMode != CodeGenOpts.FPDenormalMode) {
       FuncAttrs.addAttribute(
           "denormal-fp-math-f32",
           CodeGenOpts.FP32DenormalMode.str());

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a998561a218b..3ca034e69b3b 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2824,8 +2824,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
   } else if (TrappingMathPresent)
     CmdArgs.push_back("-fno-trapping-math");
 
-  // TODO: Omit flag for the default IEEE instead
-  if (DenormalFPMath.isValid()) {
+  // The default is IEEE.
+  if (DenormalFPMath != llvm::DenormalMode::getIEEE()) {
     llvm::SmallString<64> DenormFlag;
     llvm::raw_svector_ostream ArgStr(DenormFlag);
     ArgStr << "-fdenormal-fp-math=" << DenormalFPMath;

diff  --git a/clang/test/CodeGen/denormalfpmode.c b/clang/test/CodeGen/denormalfpmode.c
index 3b9ad0d7273b..7fd2b0b42e9d 100644
--- a/clang/test/CodeGen/denormalfpmode.c
+++ b/clang/test/CodeGen/denormalfpmode.c
@@ -3,7 +3,9 @@
 // RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-PZ
 
 // CHECK-LABEL: main
-// CHECK-IEEE: attributes #0 = {{.*}}"denormal-fp-math"="ieee,ieee"{{.*}}
+
+// The ieee,ieee is the default, so omit the attribute
+// CHECK-IEEE-NOT:"denormal-fp-math"
 // CHECK-PS: attributes #0 = {{.*}}"denormal-fp-math"="preserve-sign,preserve-sign"{{.*}}
 // CHECK-PZ: attributes #0 = {{.*}}"denormal-fp-math"="positive-zero,positive-zero"{{.*}}
 

diff  --git a/clang/test/CodeGenCUDA/flush-denormals.cu b/clang/test/CodeGenCUDA/flush-denormals.cu
index 275338635bc6..8577fca92866 100644
--- a/clang/test/CodeGenCUDA/flush-denormals.cu
+++ b/clang/test/CodeGenCUDA/flush-denormals.cu
@@ -39,7 +39,7 @@
 extern "C" __device__ void foo() {}
 
 // FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
-// NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee,ieee"
+// NOFTZ-NOT: "denormal-fp-math-f32"
 
 // AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals
 // AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals

diff  --git a/clang/test/CodeGenCUDA/propagate-metadata.cu b/clang/test/CodeGenCUDA/propagate-metadata.cu
index 2514eeb55d20..a8cabe6b6ced 100644
--- a/clang/test/CodeGenCUDA/propagate-metadata.cu
+++ b/clang/test/CodeGenCUDA/propagate-metadata.cu
@@ -60,9 +60,9 @@ __global__ void kernel() { lib_fn(); }
 // CHECK-SAME: convergent
 // CHECK-SAME: norecurse
 
-// FTZ: "denormal-fp-math"="ieee,ieee"
+// FTZ-NOT: "denormal-fp-math"
 // FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
-// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
+// NOFTZ-NOT: "denormal-fp-math-f32"
 
 // CHECK-SAME: "no-trapping-math"="true"
 
@@ -75,11 +75,11 @@ __global__ void kernel() { lib_fn(); }
 // CHECK-SAME: convergent
 // CHECK-NOT: norecurse
 
-// FTZ-SAME: "denormal-fp-math"="ieee,ieee"
-// NOFTZ-SAME: "denormal-fp-math"="ieee,ieee"
+// FTZ-NOT: "denormal-fp-math"
+// NOFTZ-NOT: "denormal-fp-math"
 
 // FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
-// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
+// NOFTZ-NOT: "denormal-fp-math-f32"
 
 // CHECK-SAME: "no-trapping-math"="true"
 

diff  --git a/clang/test/Driver/default-denormal-fp-math.c b/clang/test/Driver/default-denormal-fp-math.c
index 9cbc645345c3..5f87e151df49 100644
--- a/clang/test/Driver/default-denormal-fp-math.c
+++ b/clang/test/Driver/default-denormal-fp-math.c
@@ -14,6 +14,6 @@
 
 // RUN: %clang -### -target x86_64-scei-ps4 -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-PRESERVESIGN %s
 
-
-// CHECK-IEEE: -fdenormal-fp-math=ieee,ieee
+// Flag omitted for default
+// CHECK-IEEE-NOT: -fdenormal-fp-math
 // CHECK-PRESERVESIGN: -fdenormal-fp-math=preserve-sign,preserve-sign

diff  --git a/clang/test/Driver/denormal-fp-math.c b/clang/test/Driver/denormal-fp-math.c
index 63a2c3b7e003..ea4dc8699ecc 100644
--- a/clang/test/Driver/denormal-fp-math.c
+++ b/clang/test/Driver/denormal-fp-math.c
@@ -8,8 +8,8 @@
 // RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,ieee -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID2 %s
 // RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID3 %s
 
-// TODO: ieee is the implied default, and the flag is not passed.
-// CHECK-IEEE: "-fdenormal-fp-math=ieee,ieee"
+// IEEE is the implied default, and the flag is not passed.
+// CHECK-IEEE-NOT: -fdenormal-fp-math=
 // CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign"
 // CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero"
 // CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee"

diff  --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 25a5f0a92c0d..d73417aa1577 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -284,15 +284,7 @@ DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const
   // TODO: Should probably avoid the connection to the IR and store directly
   // in the MachineFunction.
   Attribute Attr = F.getFnAttribute("denormal-fp-math");
-
-  // FIXME: This should assume IEEE behavior on an unspecified
-  // attribute. However, the one current user incorrectly assumes a non-IEEE
-  // target by default.
-  StringRef Val = Attr.getValueAsString();
-  if (Val.empty())
-    return DenormalMode::getInvalid();
-
-  return parseDenormalFPAttribute(Val);
+  return parseDenormalFPAttribute(Attr.getValueAsString());
 }
 
 /// Should we be emitting segmented stack stuff for the function

diff  --git a/llvm/test/CodeGen/X86/pow.ll b/llvm/test/CodeGen/X86/pow.ll
index 52e9ebbe852e..f3d713f5f224 100644
--- a/llvm/test/CodeGen/X86/pow.ll
+++ b/llvm/test/CodeGen/X86/pow.ll
@@ -9,8 +9,42 @@ declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
 
 declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80)
 
-define float @pow_f32_one_fourth_fmf(float %x) nounwind {
-; CHECK-LABEL: pow_f32_one_fourth_fmf:
+define float @pow_f32_one_fourth_fmf_ieee(float %x) nounwind {
+; CHECK-LABEL: pow_f32_one_fourth_fmf_ieee:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    rsqrtss %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm0, %xmm3
+; CHECK-NEXT:    mulss %xmm1, %xmm3
+; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movaps %xmm3, %xmm4
+; CHECK-NEXT:    mulss %xmm2, %xmm4
+; CHECK-NEXT:    mulss %xmm1, %xmm3
+; CHECK-NEXT:    movss {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; CHECK-NEXT:    addss %xmm5, %xmm3
+; CHECK-NEXT:    mulss %xmm4, %xmm3
+; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT:    andps %xmm1, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT:    cmpltss %xmm4, %xmm0
+; CHECK-NEXT:    andnps %xmm3, %xmm0
+; CHECK-NEXT:    xorps %xmm3, %xmm3
+; CHECK-NEXT:    rsqrtss %xmm0, %xmm3
+; CHECK-NEXT:    andps %xmm0, %xmm1
+; CHECK-NEXT:    mulss %xmm3, %xmm0
+; CHECK-NEXT:    mulss %xmm0, %xmm2
+; CHECK-NEXT:    mulss %xmm3, %xmm0
+; CHECK-NEXT:    addss %xmm5, %xmm0
+; CHECK-NEXT:    mulss %xmm2, %xmm0
+; CHECK-NEXT:    cmpltss %xmm4, %xmm1
+; CHECK-NEXT:    andnps %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01)
+  ret float %r
+}
+
+define float @pow_f32_one_fourth_fmf_daz(float %x) #0 {
+; CHECK-LABEL: pow_f32_one_fourth_fmf_daz:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    rsqrtss %xmm0, %xmm1
 ; CHECK-NEXT:    movaps %xmm0, %xmm2
@@ -60,21 +94,26 @@ define <4 x float> @pow_v4f32_one_fourth_fmf(<4 x float> %x) nounwind {
 ; CHECK-NEXT:    movaps %xmm2, %xmm4
 ; CHECK-NEXT:    mulps %xmm3, %xmm4
 ; CHECK-NEXT:    mulps %xmm1, %xmm2
-; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
-; CHECK-NEXT:    addps %xmm1, %xmm2
+; CHECK-NEXT:    movaps {{.*#+}} xmm5 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
+; CHECK-NEXT:    addps %xmm5, %xmm2
 ; CHECK-NEXT:    mulps %xmm4, %xmm2
-; CHECK-NEXT:    xorps %xmm4, %xmm4
-; CHECK-NEXT:    cmpneqps %xmm4, %xmm0
-; CHECK-NEXT:    andps %xmm2, %xmm0
-; CHECK-NEXT:    rsqrtps %xmm0, %xmm2
-; CHECK-NEXT:    movaps %xmm0, %xmm5
-; CHECK-NEXT:    mulps %xmm2, %xmm5
-; CHECK-NEXT:    mulps %xmm5, %xmm3
-; CHECK-NEXT:    mulps %xmm2, %xmm5
-; CHECK-NEXT:    addps %xmm1, %xmm5
-; CHECK-NEXT:    mulps %xmm3, %xmm5
-; CHECK-NEXT:    cmpneqps %xmm4, %xmm0
-; CHECK-NEXT:    andps %xmm5, %xmm0
+; CHECK-NEXT:    movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT:    andps %xmm4, %xmm0
+; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
+; CHECK-NEXT:    movaps %xmm1, %xmm6
+; CHECK-NEXT:    cmpleps %xmm0, %xmm6
+; CHECK-NEXT:    andps %xmm2, %xmm6
+; CHECK-NEXT:    rsqrtps %xmm6, %xmm0
+; CHECK-NEXT:    movaps %xmm6, %xmm2
+; CHECK-NEXT:    mulps %xmm0, %xmm2
+; CHECK-NEXT:    mulps %xmm2, %xmm3
+; CHECK-NEXT:    mulps %xmm0, %xmm2
+; CHECK-NEXT:    addps %xmm5, %xmm2
+; CHECK-NEXT:    mulps %xmm3, %xmm2
+; CHECK-NEXT:    andps %xmm4, %xmm6
+; CHECK-NEXT:    cmpleps %xmm6, %xmm1
+; CHECK-NEXT:    andps %xmm2, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 2.5e-1, float 2.5e-1, float 2.5e-01, float 2.5e-01>)
   ret <4 x float> %r
@@ -228,3 +267,4 @@ define double @pow_f64_not_enough_fmf(double %x) nounwind {
   ret double %r
 }
 
+attributes #0 = { nounwind "denormal-fp-math"="ieee,preserve-sign" }

diff  --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 152833edc28d..7be19c07da80 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -1,10 +1,42 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=finalize-isel 2>&1 | FileCheck %s
 
-declare float @llvm.sqrt.f32(float) #0
+declare float @llvm.sqrt.f32(float) #2
 
-define float @foo(float %f) #0 {
-  ; CHECK-LABEL: name: foo
+define float @sqrt_ieee(float %f) #0 {
+  ; CHECK-LABEL: name: sqrt_ieee
+  ; CHECK: bb.0 (%ir-block.0):
+  ; CHECK:   liveins: $xmm0
+  ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
+  ; CHECK:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
+  ; CHECK:   [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+  ; CHECK:   %3:fr32 = nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+  ; CHECK:   [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
+  ; CHECK:   %5:fr32 = nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
+  ; CHECK:   %7:fr32 = nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %8:fr32 = nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+  ; CHECK:   %9:fr32 = nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+  ; CHECK:   %10:fr32 = nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %11:fr32 = nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %12:fr32 = nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+  ; CHECK:   [[COPY1:%[0-9]+]]:vr128 = COPY %12
+  ; CHECK:   [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]]
+  ; CHECK:   [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool)
+  ; CHECK:   [[VPANDrr:%[0-9]+]]:vr128 = VPANDrr killed [[COPY2]], killed [[VPBROADCASTDrm]]
+  ; CHECK:   [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDrr]]
+  ; CHECK:   %18:fr32 = nofpexcept VCMPSSrm killed [[COPY3]], $rip, 1, $noreg, %const.3, $noreg, 1, implicit $mxcsr :: (load 4 from constant-pool)
+  ; CHECK:   [[COPY4:%[0-9]+]]:vr128 = COPY %18
+  ; CHECK:   [[VPANDNrr:%[0-9]+]]:vr128 = VPANDNrr killed [[COPY4]], killed [[COPY1]]
+  ; CHECK:   [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
+  ; CHECK:   $xmm0 = COPY [[COPY5]]
+  ; CHECK:   RET 0, $xmm0
+  %call = tail call float @llvm.sqrt.f32(float %f)
+  ret float %call
+}
+
+define float @sqrt_daz(float %f) #1 {
+  ; CHECK-LABEL: name: sqrt_daz
   ; CHECK: bb.0 (%ir-block.0):
   ; CHECK:   liveins: $xmm0
   ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
@@ -28,12 +60,36 @@ define float @foo(float %f) #0 {
   ; CHECK:   [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
   ; CHECK:   $xmm0 = COPY [[COPY3]]
   ; CHECK:   RET 0, $xmm0
-  %call = tail call float @llvm.sqrt.f32(float %f) #1
+  %call = tail call float @llvm.sqrt.f32(float %f)
   ret float %call
 }
 
-define float @rfoo(float %f) #0 {
-  ; CHECK-LABEL: name: rfoo
+define float @rsqrt_ieee(float %f) #0 {
+  ; CHECK-LABEL: name: rsqrt_ieee
+  ; CHECK: bb.0 (%ir-block.0):
+  ; CHECK:   liveins: $xmm0
+  ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
+  ; CHECK:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
+  ; CHECK:   [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+  ; CHECK:   %3:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+  ; CHECK:   [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
+  ; CHECK:   %5:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
+  ; CHECK:   %7:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %8:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+  ; CHECK:   %9:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+  ; CHECK:   %10:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VFMADD213SSr %8, killed %9, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %11:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr %8, [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %12:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+  ; CHECK:   $xmm0 = COPY %12
+  ; CHECK:   RET 0, $xmm0
+  %sqrt = tail call float @llvm.sqrt.f32(float %f)
+  %div = fdiv fast float 1.0, %sqrt
+  ret float %div
+}
+
+define float @rsqrt_daz(float %f) #1 {
+  ; CHECK-LABEL: name: rsqrt_daz
   ; CHECK: bb.0 (%ir-block.0):
   ; CHECK:   liveins: $xmm0
   ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
@@ -56,5 +112,6 @@ define float @rfoo(float %f) #0 {
   ret float %div
 }
 
-attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,ieee" }
+attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,preserve-sign" }
+attributes #2 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index 3986c8f863d7..f10199ce958f 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -56,8 +56,55 @@ define float @finite_f32_no_estimate(float %f) #0 {
   ret float %call
 }
 
-define float @finite_f32_estimate(float %f) #1 {
-; SSE-LABEL: finite_f32_estimate:
+define float @finite_f32_estimate_ieee(float %f) #1 {
+; SSE-LABEL: finite_f32_estimate_ieee:
+; SSE:       # %bb.0:
+; SSE-NEXT:    rsqrtss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    mulss %xmm1, %xmm2
+; SSE-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-NEXT:    mulss %xmm2, %xmm3
+; SSE-NEXT:    mulss %xmm1, %xmm2
+; SSE-NEXT:    addss {{.*}}(%rip), %xmm2
+; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE-NEXT:    mulss %xmm3, %xmm2
+; SSE-NEXT:    cmpltss {{.*}}(%rip), %xmm0
+; SSE-NEXT:    andnps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: finite_f32_estimate_ieee:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
+; AVX1-NEXT:    vmulss %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vaddss {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vmulss {{.*}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vcmpltss {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vandnps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: finite_f32_estimate_ieee:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
+; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
+; AVX512-NEXT:    vandps %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vcmpltss {{.*}}(%rip), %xmm0, %k1
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %call = tail call float @__sqrtf_finite(float %f) #2
+  ret float %call
+}
+
+define float @finite_f32_estimate_daz(float %f) #4 {
+; SSE-LABEL: finite_f32_estimate_daz:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    rsqrtss %xmm0, %xmm1
 ; SSE-NEXT:    movaps %xmm0, %xmm2
@@ -72,7 +119,7 @@ define float @finite_f32_estimate(float %f) #1 {
 ; SSE-NEXT:    andnps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: finite_f32_estimate:
+; AVX1-LABEL: finite_f32_estimate_daz:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
 ; AVX1-NEXT:    vmulss %xmm1, %xmm0, %xmm2
@@ -85,7 +132,7 @@ define float @finite_f32_estimate(float %f) #1 {
 ; AVX1-NEXT:    vandnps %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
-; AVX512-LABEL: finite_f32_estimate:
+; AVX512-LABEL: finite_f32_estimate_daz:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
 ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm2
@@ -516,4 +563,4 @@ attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sq
 attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee" }
-
+attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }


        


More information about the llvm-commits mailing list