[llvm] [SelectionDAG] Remove UnsafeFPMath check in `visitFADDForFMACombine` (PR #127770)

Tue Jun 17 21:42:11 PDT 2025

https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/127770

>From 0e12887499ad22480fbd09bda83180b8f4fc0a51 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Wed, 18 Jun 2025 09:59:33 +0800
Subject: [PATCH] [SelectionDAG] Remove UnsafeFPMath check in
 visitFADDForFMACombine

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  21 +-
 .../CodeGen/AArch64/arm64-fp-contract-zero.ll |  10 +-
 llvm/test/CodeGen/AMDGPU/fdot2.ll             | 237 +++++++++++++++---
 llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll |   6 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll |  95 ++++---
 llvm/test/CodeGen/NVPTX/fma-relu-contract.ll  |  21 +-
 llvm/test/CodeGen/PowerPC/fmf-propagation.ll  |   4 +-
 .../PowerPC/vsx-fma-mutate-trivial-copy.ll    |   4 +-
 llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll    |   6 +-
 llvm/test/CodeGen/X86/sqrt-fastmath.ll        |  13 +-
 10 files changed, 307 insertions(+), 110 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aba3c0f80a024..a9d94090cbcf4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16619,8 +16619,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   if (!HasFMAD && !HasFMA)
     return SDValue();
 
-  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
-                              Options.UnsafeFPMath || HasFMAD);
+  bool AllowFusionGlobally =
+      Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
   // If the addition is not contractable, do not combine.
   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
     return SDValue();
@@ -17826,6 +17826,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
   SDValue N2 = N->getOperand(2);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
@@ -17855,11 +17856,17 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
   }
 
   // FIXME: use fast math flags instead of Options.UnsafeFPMath
-  if (Options.UnsafeFPMath) {
-    if (N0CFP && N0CFP->isZero())
-      return N2;
-    if (N1CFP && N1CFP->isZero())
-      return N2;
+  // TODO: Finally migrate away from global TargetOptions.
+  if (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+      (Options.NoNaNsFPMath && Options.NoInfsFPMath) ||
+      (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
+    if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros() ||
+        (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
+      if (N0CFP && N0CFP->isZero())
+        return N2;
+      if (N1CFP && N1CFP->isZero())
+        return N2;
+    }
   }
 
   // FIXME: Support splat of constant.
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll b/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
index 9a753748a29ef..53e69b985cafb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=arm64 -fp-contract=fast -o - %s | FileCheck %s
+; RUN: llc -mtriple=arm64 -o - %s | FileCheck %s
 
 
 ; Make sure we don't try to fold an fneg into +0.0, creating an illegal constant
@@ -7,12 +7,10 @@
 define double @test_fms_fold(double %a, double %b) {
 ; CHECK-LABEL: test_fms_fold:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi d2, #0000000000000000
-; CHECK-NEXT:    fmul d1, d1, d2
-; CHECK-NEXT:    fnmsub d0, d0, d2, d1
+; CHECK-NEXT:    movi {{d[0-9]+}}, #0000000000000000
 ; CHECK-NEXT:    ret
-  %mul = fmul double %a, 0.000000e+00
-  %mul1 = fmul double %b, 0.000000e+00
+  %mul = fmul fast double %a, 0.000000e+00
+  %mul1 = fmul fast double %b, 0.000000e+00
   %sub = fsub double %mul, %mul1
   ret double %sub
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fdot2.ll b/llvm/test/CodeGen/AMDGPU/fdot2.ll
index 776816d6aa0e3..18281f91dbb8b 100644
--- a/llvm/test/CodeGen/AMDGPU/fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdot2.ll
@@ -1,28 +1,53 @@
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
+; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-CONTRACT
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DENORM-CONTRACT
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -mattr="+dot7-insts,-dot10-insts" -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DOT10-DISABLED
+; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -mattr="+dot7-insts,-dot10-insts" -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DOT10-DISABLED
 ; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
 
 ; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
 ; are not converted from f16 to f32.
-; GCN-LABEL: {{^}}dotproduct_f16
+; GCN-LABEL: {{^}}dotproduct_f16_contract
 ; GFX900: v_fma_f16
 ; GFX900: v_fma_f16
 
-; GFX906: v_mul_f16_e32
-; GFX906: v_mul_f16_e32
-
 ; GFX906-DL-UNSAFE:  v_fma_f16
 ; GFX10-CONTRACT: v_fmac_f16
 
 ; GFX906-CONTRACT: v_mac_f16_e32
 ; GFX906-DENORM-CONTRACT: v_fma_f16
 ; GFX906-DOT10-DISABLED: v_fma_f16
+
+define amdgpu_kernel void @dotproduct_f16_contract(ptr addrspace(1) %src1,
+                                                   ptr addrspace(1) %src2,
+                                                   ptr addrspace(1) nocapture %dst) {
+entry:
+  %src1.vec = load <2 x half>, ptr addrspace(1) %src1
+  %src2.vec = load <2 x half>, ptr addrspace(1) %src2
+
+  %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
+  %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
+
+  %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
+  %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
+
+  %mul2 = fmul fast half %src1.el2, %src2.el2
+  %mul1 = fmul fast half %src1.el1, %src2.el1
+  %acc = load half, ptr addrspace(1) %dst, align 2
+  %acc1 = fadd fast half %mul2, %acc
+  %acc2 = fadd fast half %mul1, %acc1
+  store half %acc2, ptr addrspace(1) %dst, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}dotproduct_f16
+
+; GFX906: v_mul_f16_e32
+; GFX906: v_mul_f16_e32
+
 define amdgpu_kernel void @dotproduct_f16(ptr addrspace(1) %src1,
                                           ptr addrspace(1) %src2,
                                           ptr addrspace(1) nocapture %dst) {
@@ -45,18 +70,12 @@ entry:
   ret void
 }
 
-
 ; We only want to generate fdot2 if:
 ; - vector element of dot product is converted from f16 to f32, and
 ; - the vectors are of type <2 x half>, and
 ; - "dot10-insts" is enabled
 
-; GCN-LABEL: {{^}}dotproduct_f16_f32
-; GFX900: v_mad_mix_f32
-; GFX900: v_mad_mix_f32
-
-; GFX906: v_mad_f32
-; GFX906: v_mac_f32_e32
+; GCN-LABEL: {{^}}dotproduct_f16_f32_contract
 
 ; GFX906-DL-UNSAFE: v_dot2_f32_f16
 ; GFX10-DL-UNSAFE: v_dot2c_f32_f16
@@ -65,6 +84,39 @@ entry:
 
 ; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
 ; GFX906-DOT10-DISABLED: v_fma_mix_f32
+define amdgpu_kernel void @dotproduct_f16_f32_contract(ptr addrspace(1) %src1,
+                                                       ptr addrspace(1) %src2,
+                                                       ptr addrspace(1) nocapture %dst) {
+entry:
+  %src1.vec = load <2 x half>, ptr addrspace(1) %src1
+  %src2.vec = load <2 x half>, ptr addrspace(1) %src2
+
+  %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
+  %csrc1.el1 = fpext half %src1.el1 to float
+  %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
+  %csrc2.el1 = fpext half %src2.el1 to float
+
+  %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
+  %csrc1.el2 = fpext half %src1.el2 to float
+  %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
+  %csrc2.el2 = fpext half %src2.el2 to float
+
+  %mul2 = fmul fast float %csrc1.el2, %csrc2.el2
+  %mul1 = fmul fast float %csrc1.el1, %csrc2.el1
+  %acc = load float, ptr addrspace(1) %dst, align 4
+  %acc1 = fadd fast float %mul2, %acc
+  %acc2 = fadd fast float %mul1, %acc1
+  store float %acc2, ptr addrspace(1) %dst, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}dotproduct_f16_f32
+; GFX900: v_mad_mix_f32
+; GFX900: v_mad_mix_f32
+
+; GFX906: v_mad_f32
+; GFX906: v_mac_f32_e32
+
 define amdgpu_kernel void @dotproduct_f16_f32(ptr addrspace(1) %src1,
                                               ptr addrspace(1) %src2,
                                               ptr addrspace(1) nocapture %dst) {
@@ -96,6 +148,39 @@ entry:
 ; - the vectors are of type <2 x half>, and
 ; - "dot10-insts" is enabled
 
+; GCN-LABEL: {{^}}dotproduct_diffvecorder_contract
+; GFX906-DL-UNSAFE: v_dot2_f32_f16
+; GFX10-DL-UNSAFE: v_dot2c_f32_f16
+
+; GFX906-CONTRACT: v_dot2_f32_f16
+; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
+; GFX906-DOT10-DISABLED: v_fma_mix_f32
+define amdgpu_kernel void @dotproduct_diffvecorder_contract(ptr addrspace(1) %src1,
+                                                            ptr addrspace(1) %src2,
+                                                            ptr addrspace(1) nocapture %dst) {
+entry:
+  %src1.vec = load <2 x half>, ptr addrspace(1) %src1
+  %src2.vec = load <2 x half>, ptr addrspace(1) %src2
+
+  %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
+  %csrc1.el1 = fpext half %src1.el1 to float
+  %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
+  %csrc2.el1 = fpext half %src2.el1 to float
+
+  %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
+  %csrc1.el2 = fpext half %src1.el2 to float
+  %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
+  %csrc2.el2 = fpext half %src2.el2 to float
+
+  %mul2 = fmul fast float %csrc2.el2, %csrc1.el2
+  %mul1 = fmul fast float %csrc1.el1, %csrc2.el1
+  %acc = load float, ptr addrspace(1) %dst, align 4
+  %acc1 = fadd fast float %mul2, %acc
+  %acc2 = fadd fast float %mul1, %acc1
+  store float %acc2, ptr addrspace(1) %dst, align 4
+  ret void
+}
+
 ; GCN-LABEL: {{^}}dotproduct_diffvecorder
 ; GFX900: v_mad_mix_f32
 ; GFX900: v_mad_mix_f32
@@ -103,12 +188,6 @@ entry:
 ; GFX906: v_mad_f32
 ; GFX906: v_mac_f32_e32
 
-; GFX906-DL-UNSAFE: v_dot2_f32_f16
-; GFX10-DL-UNSAFE: v_dot2c_f32_f16
-
-; GFX906-CONTRACT: v_dot2_f32_f16
-; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
-; GFX906-DOT10-DISABLED: v_fma_mix_f32
 define amdgpu_kernel void @dotproduct_diffvecorder(ptr addrspace(1) %src1,
                                                    ptr addrspace(1) %src2,
                                                    ptr addrspace(1) nocapture %dst) {
@@ -136,17 +215,45 @@ entry:
 }
 
 ; Tests to make sure dot product is not generated when the vectors are not of <2 x half>.
-; GCN-LABEL: {{^}}dotproduct_v4f16
-; GFX900: v_mad_mix_f32
-
-; GFX906: v_mad_f32
-; GFX906: v_mac_f32_e32
+; GCN-LABEL: {{^}}dotproduct_v4f16_contract
 
 ; GCN-DL-UNSAFE: v_fma_mix_f32
 
 ; GFX906-CONTRACT: v_fma_mix_f32
 ; GFX906-DENORM-CONTRACT: v_fma_mix_f32
 ; GFX906-DOT10-DISABLED: v_fma_mix_f32
+define amdgpu_kernel void @dotproduct_v4f16_contract(ptr addrspace(1) %src1,
+                                                     ptr addrspace(1) %src2,
+                                                     ptr addrspace(1) nocapture %dst) {
+entry:
+  %src1.vec = load <4 x half>, ptr addrspace(1) %src1
+  %src2.vec = load <4 x half>, ptr addrspace(1) %src2
+
+  %src1.el1 = extractelement <4 x half> %src1.vec, i64 0
+  %csrc1.el1 = fpext half %src1.el1 to float
+  %src2.el1 = extractelement <4 x half> %src2.vec, i64 0
+  %csrc2.el1 = fpext half %src2.el1 to float
+
+  %src1.el2 = extractelement <4 x half> %src1.vec, i64 1
+  %csrc1.el2 = fpext half %src1.el2 to float
+  %src2.el2 = extractelement <4 x half> %src2.vec, i64 1
+  %csrc2.el2 = fpext half %src2.el2 to float
+
+  %mul2 = fmul fast float %csrc1.el2, %csrc2.el2
+  %mul1 = fmul float %csrc1.el1, %csrc2.el1
+  %acc = load float, ptr addrspace(1) %dst, align 4
+  %acc1 = fadd fast float %mul2, %acc
+  %acc2 = fadd fast float %mul1, %acc1
+  store float %acc2, ptr addrspace(1) %dst, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}dotproduct_v4f16
+; GFX900: v_mad_mix_f32
+
+; GFX906: v_mad_f32
+; GFX906: v_mac_f32_e32
+
 define amdgpu_kernel void @dotproduct_v4f16(ptr addrspace(1) %src1,
                                             ptr addrspace(1) %src2,
                                             ptr addrspace(1) nocapture %dst) {
@@ -173,6 +280,39 @@ entry:
   ret void
 }
 
+; GCN-LABEL: {{^}}NotAdotproductContract
+
+; GCN-DL-UNSAFE: v_fma_mix_f32
+
+; GFX906-CONTRACT: v_fma_mix_f32
+; GFX906-DENORM-CONTRACT: v_fma_mix_f32
+; GFX906-DOT10-DISABLED: v_fma_mix_f32
+define amdgpu_kernel void @NotAdotproductContract(ptr addrspace(1) %src1,
+                                                  ptr addrspace(1) %src2,
+                                                  ptr addrspace(1) nocapture %dst) {
+entry:
+  %src1.vec = load <2 x half>, ptr addrspace(1) %src1
+  %src2.vec = load <2 x half>, ptr addrspace(1) %src2
+
+  %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
+  %csrc1.el1 = fpext half %src1.el1 to float
+  %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
+  %csrc2.el1 = fpext half %src2.el1 to float
+
+  %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
+  %csrc1.el2 = fpext half %src1.el2 to float
+  %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
+  %csrc2.el2 = fpext half %src2.el2 to float
+
+  %mul2 = fmul fast float %csrc1.el2, %csrc1.el1
+  %mul1 = fmul fast float %csrc2.el1, %csrc2.el2
+  %acc = load float, ptr addrspace(1) %dst, align 4
+  %acc1 = fadd fast float %mul2, %acc
+  %acc2 = fadd fast float %mul1, %acc1
+  store float %acc2, ptr addrspace(1) %dst, align 4
+  ret void
+}
+
 ; GCN-LABEL: {{^}}NotAdotproduct
 ; GFX900: v_mad_mix_f32
 ; GFX900: v_mad_mix_f32
@@ -180,11 +320,6 @@ entry:
 ; GFX906: v_mad_f32
 ; GFX906: v_mac_f32_e32
 
-; GCN-DL-UNSAFE: v_fma_mix_f32
-
-; GFX906-CONTRACT: v_fma_mix_f32
-; GFX906-DENORM-CONTRACT: v_fma_mix_f32
-; GFX906-DOT10-DISABLED: v_fma_mix_f32
 define amdgpu_kernel void @NotAdotproduct(ptr addrspace(1) %src1,
                                           ptr addrspace(1) %src2,
                                           ptr addrspace(1) nocapture %dst) {
@@ -211,6 +346,39 @@ entry:
   ret void
 }
 
+; GCN-LABEL: {{^}}Diff_Idx_NotAdotproductContract
+
+; GCN-DL-UNSAFE: v_fma_mix_f32
+
+; GFX906-CONTRACT: v_fma_mix_f32
+; GFX906-DENORM-CONTRACT: v_fma_mix_f32
+; GFX906-DOT10-DISABLED: v_fma_mix_f32
+define amdgpu_kernel void @Diff_Idx_NotAdotproductContract(ptr addrspace(1) %src1,
+                                                           ptr addrspace(1) %src2,
+                                                           ptr addrspace(1) nocapture %dst) {
+entry:
+  %src1.vec = load <2 x half>, ptr addrspace(1) %src1
+  %src2.vec = load <2 x half>, ptr addrspace(1) %src2
+
+  %src1.el1 = extractelement <2 x half> %src1.vec, i64 0
+  %csrc1.el1 = fpext half %src1.el1 to float
+  %src2.el1 = extractelement <2 x half> %src2.vec, i64 0
+  %csrc2.el1 = fpext half %src2.el1 to float
+
+  %src1.el2 = extractelement <2 x half> %src1.vec, i64 1
+  %csrc1.el2 = fpext half %src1.el2 to float
+  %src2.el2 = extractelement <2 x half> %src2.vec, i64 1
+  %csrc2.el2 = fpext half %src2.el2 to float
+
+  %mul2 = fmul fast float %csrc1.el2, %csrc2.el1
+  %mul1 = fmul fast float %csrc1.el1, %csrc2.el2
+  %acc = load float, ptr addrspace(1) %dst, align 4
+  %acc1 = fadd fast float %mul2, %acc
+  %acc2 = fadd fast float %mul1, %acc1
+  store float %acc2, ptr addrspace(1) %dst, align 4
+  ret void
+}
+
 ; GCN-LABEL: {{^}}Diff_Idx_NotAdotproduct
 ; GFX900: v_mad_mix_f32
 ; GFX900: v_mad_mix_f32
@@ -218,11 +386,6 @@ entry:
 ; GFX906: v_mad_f32
 ; GFX906: v_mac_f32_e32
 
-; GCN-DL-UNSAFE: v_fma_mix_f32
-
-; GFX906-CONTRACT: v_fma_mix_f32
-; GFX906-DENORM-CONTRACT: v_fma_mix_f32
-; GFX906-DOT10-DISABLED: v_fma_mix_f32
 define amdgpu_kernel void @Diff_Idx_NotAdotproduct(ptr addrspace(1) %src1,
                                                    ptr addrspace(1) %src2,
                                                    ptr addrspace(1) nocapture %dst) {
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index f199db3ca12ca..462d7748b86cd 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -832,9 +832,9 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
 ; GFX11-NSZ-TRUE16-NEXT:    ; return to shader part epilog
 .entry:
   %tmp7 = fdiv afn half 1.000000e+00, %tmp6
-  %tmp8 = fmul half 0.000000e+00, %tmp7
+  %tmp8 = fmul contract half 0.000000e+00, %tmp7
   %tmp9 = fmul reassoc nnan arcp contract half 0.000000e+00, %tmp8
-  %.i188 = fadd half %tmp9, 0.000000e+00
+  %.i188 = fadd nnan ninf contract half %tmp9, 0.000000e+00
   %tmp10 = fcmp uge half %.i188, %tmp2
   %tmp11 = fneg half %.i188
   %.i092 = select i1 %tmp10, half %tmp2, half %tmp11
@@ -6258,7 +6258,7 @@ declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
 
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+attributes #2 = { nounwind }
 attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
 attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 46da9d33639b6..dc9942b7274ea 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,SI,SI-SAFE %s
-; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI,SI-NSZ %s
+; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI,SI-NSZ %s
 
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
-; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s
+; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s
 
 ; --------------------------------------------------------------------------------
 ; fadd tests
@@ -289,14 +289,18 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ; function attribute unsafe-fp-math automatically. Combine with the previous test
 ; when that is done.
 define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #2 {
-; SI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
-; SI-SAFE:       ; %bb.0: ; %.entry
-; SI-SAFE-NEXT:    v_min_legacy_f32_e64 v0, 0, s0
-; SI-SAFE-NEXT:    s_brev_b32 s0, 1
-; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
-; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
-; SI-SAFE-NEXT:    ; return to shader part epilog
+; GCN-SAFE-LABEL: fneg_fadd_0_nsz_f32:
+; GCN-SAFE:       ; %bb.0: ; %.entry
+; GCN-SAFE-NEXT:    v_rcp_f32_e32 v0, s1
+; GCN-SAFE-NEXT:    v_mov_b32_e32 v1, s0
+; GCN-SAFE-NEXT:    v_mul_f32_e32 v0, 0, v0
+; GCN-SAFE-NEXT:    v_add_f32_e32 v0, 0, v0
+; GCN-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
+; GCN-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
+; GCN-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
+; GCN-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
+; GCN-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
+; GCN-SAFE-NEXT:    ; return to shader part epilog
 ;
 ; GCN-NSZ-LABEL: fneg_fadd_0_nsz_f32:
 ; GCN-NSZ:       ; %bb.0: ; %.entry
@@ -309,19 +313,6 @@ define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6
 ; GCN-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
 ; GCN-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 ; GCN-NSZ-NEXT:    ; return to shader part epilog
-;
-; VI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
-; VI-SAFE:       ; %bb.0: ; %.entry
-; VI-SAFE-NEXT:    v_rcp_f32_e32 v0, s1
-; VI-SAFE-NEXT:    v_mov_b32_e32 v1, s0
-; VI-SAFE-NEXT:    v_mul_f32_e32 v0, 0, v0
-; VI-SAFE-NEXT:    v_add_f32_e32 v0, 0, v0
-; VI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
-; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
-; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; VI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
-; VI-SAFE-NEXT:    ; return to shader part epilog
 .entry:
   %tmp7 = fdiv afn float 1.000000e+00, %tmp6
   %tmp8 = fmul float 0.000000e+00, %tmp7
@@ -672,17 +663,28 @@ define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6,
 ; function attribute unsafe-fp-math automatically. Combine with the previous test
 ; when that is done.
 define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #2 {
-; GCN-SAFE-LABEL: fneg_fadd_0_nsz_f64:
-; GCN-SAFE:       ; %bb.0: ; %.entry
-; GCN-SAFE-NEXT:    v_cmp_ngt_f64_e64 s[2:3], s[0:1], 0
-; GCN-SAFE-NEXT:    s_and_b64 s[2:3], s[2:3], exec
-; GCN-SAFE-NEXT:    s_cselect_b32 s1, s1, 0x80000000
-; GCN-SAFE-NEXT:    s_cselect_b32 s0, s0, 0
-; GCN-SAFE-NEXT:    v_cmp_ngt_f64_e64 s[0:1], s[0:1], 0
-; GCN-SAFE-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GCN-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
-; GCN-SAFE-NEXT:    s_mov_b32 s0, 0
-; GCN-SAFE-NEXT:    ; return to shader part epilog
+; SI-SAFE-LABEL: fneg_fadd_0_nsz_f64:
+; SI-SAFE:       ; %bb.0: ; %.entry
+; SI-SAFE-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
+; SI-SAFE-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; SI-SAFE-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; SI-SAFE-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; SI-SAFE-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; SI-SAFE-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; SI-SAFE-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; SI-SAFE-NEXT:    v_mov_b32_e32 v2, s1
+; SI-SAFE-NEXT:    v_mul_f64 v[0:1], v[0:1], 0
+; SI-SAFE-NEXT:    v_mov_b32_e32 v3, s0
+; SI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], 0
+; SI-SAFE-NEXT:    v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1]
+; SI-SAFE-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
+; SI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
+; SI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; SI-SAFE-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
+; SI-SAFE-NEXT:    s_and_b64 s[0:1], vcc, exec
+; SI-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
+; SI-SAFE-NEXT:    s_mov_b32 s0, 0
+; SI-SAFE-NEXT:    ; return to shader part epilog
 ;
 ; SI-NSZ-LABEL: fneg_fadd_0_nsz_f64:
 ; SI-NSZ:       ; %bb.0: ; %.entry
@@ -707,6 +709,29 @@ define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %t
 ; SI-NSZ-NEXT:    s_mov_b32 s0, 0
 ; SI-NSZ-NEXT:    ; return to shader part epilog
 ;
+; VI-SAFE-LABEL: fneg_fadd_0_nsz_f64:
+; VI-SAFE:       ; %bb.0: ; %.entry
+; VI-SAFE-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
+; VI-SAFE-NEXT:    v_mov_b32_e32 v4, s0
+; VI-SAFE-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-SAFE-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; VI-SAFE-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-SAFE-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; VI-SAFE-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
+; VI-SAFE-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; VI-SAFE-NEXT:    v_mov_b32_e32 v2, s1
+; VI-SAFE-NEXT:    v_mul_f64 v[0:1], v[0:1], 0
+; VI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], 0
+; VI-SAFE-NEXT:    v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1]
+; VI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v1
+; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
+; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; VI-SAFE-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
+; VI-SAFE-NEXT:    s_and_b64 s[0:1], vcc, exec
+; VI-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
+; VI-SAFE-NEXT:    s_mov_b32 s0, 0
+; VI-SAFE-NEXT:    ; return to shader part epilog
+;
 ; VI-NSZ-LABEL: fneg_fadd_0_nsz_f64:
 ; VI-NSZ:       ; %bb.0: ; %.entry
 ; VI-NSZ-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
@@ -4602,6 +4627,6 @@ declare half @llvm.amdgcn.rcp.f16(half) #1
 
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+attributes #2 = { nounwind }
 attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
 attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
index 9c11f169a89df..b10a740bf003b 100644
--- a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -fp-contract=fast | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -fp-contract=fast | %ptxas-verify -arch=sm_80 %}
 
 ; Using FTZ should emit fma.ftz.relu for f16, not for bf16
-; RUN: llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK-FTZ
-; RUN: %if ptxas %{ llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
+; RUN: llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK-FTZ
+; RUN: %if ptxas %{ llc < %s -denormal-fp-math-f32=preserve-sign -fp-contract=fast -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
 
 ; SM < 80 or (which needs PTX version >= 70) should not emit fma{.ftz}.relu
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s --check-prefixes=CHECK-SM70
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK-SM70
 
 define half @fma_f16_expanded_no_nans(half %a, half %b, half %c) #0 {
 ; CHECK-LABEL: fma_f16_expanded_no_nans(
@@ -119,7 +119,7 @@ define half @fma_f16_expanded_no_nans_multiple_uses_of_fma(half %a, half %b, hal
   ret half %6
 }
 
-define half @fma_f16_expanded_unsafe_with_nans(half %a, half %b, half %c) #1 {
+define half @fma_f16_expanded_unsafe_with_nans(half %a, half %b, half %c) {
 ; CHECK-LABEL: fma_f16_expanded_unsafe_with_nans(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b16 %rs<7>;
@@ -216,7 +216,7 @@ define half @fma_f16_expanded_maxnum_no_nans(half %a, half %b, half %c) #0 {
   ret half %3
 }
 
-define bfloat @fma_bf16_expanded_unsafe_with_nans(bfloat %a, bfloat %b, bfloat %c) #1 {
+define bfloat @fma_bf16_expanded_unsafe_with_nans(bfloat %a, bfloat %b, bfloat %c) {
 ; CHECK-LABEL: fma_bf16_expanded_unsafe_with_nans(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b16 %rs<7>;
@@ -614,7 +614,7 @@ define <2 x half> @fma_f16x2_expanded_no_nans_multiple_uses_of_fma(<2 x half> %a
   ret <2 x half> %6
 }
 
-define <2 x half> @fma_f16x2_expanded_unsafe_with_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) #1 {
+define <2 x half> @fma_f16x2_expanded_unsafe_with_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
 ; CHECK-LABEL: fma_f16x2_expanded_unsafe_with_nans(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<7>;
@@ -720,7 +720,7 @@ define <2 x half> @fma_f16x2_expanded_maxnum_no_nans(<2 x half> %a, <2 x half> %
   ret <2 x half> %3
 }
 
-define <2 x bfloat> @fma_bf16x2_expanded_unsafe_with_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #1 {
+define <2 x bfloat> @fma_bf16x2_expanded_unsafe_with_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) {
 ; CHECK-LABEL: fma_bf16x2_expanded_unsafe_with_nans(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<7>;
@@ -1126,5 +1126,4 @@ define <2 x bfloat> @fma_bf16x2_expanded_maxnum_no_nans(<2 x bfloat> %a, <2 x bf
   ret <2 x bfloat> %3
 }
 
-attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" }
+attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index 4e72a5ac5ede3..e71f59c79ce4d 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -2,8 +2,8 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1                        | FileCheck %s --check-prefix=FMFDEBUG
 ; RUN: llc < %s -mtriple=powerpc64le                                                           | FileCheck %s --check-prefix=FMF
-; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
-; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
+; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -fp-contract=fast -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
+; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -fp-contract=fast -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
 
 ; Test FP transforms using instruction/node-level fast-math-flags.
 ; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
index 96aa58000f9b7..539b563691723 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -fp-contract=fast < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -31,7 +31,7 @@ declare double @llvm.sqrt.f64(double) #1
 
 declare signext i32 @p_col_helper(...) #2
 
-attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="pwr7" "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="pwr7" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 2c7da100344b7..78df4f685f6e9 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=finalize-isel 2>&1 | FileCheck %s
+; RUN: llc -fp-contract=fast < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=finalize-isel 2>&1 | FileCheck %s
 
 declare float @llvm.sqrt.f32(float) #2
 
@@ -144,6 +144,6 @@ define float @rsqrt_daz(float %f) #1 {
   ret float %div
 }
 
-attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,ieee" }
-attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,preserve-sign" }
+attributes #0 = { "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,ieee" }
+attributes #1 = { "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,preserve-sign" }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index 9f420bcede110..5cd604c62a166 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -183,7 +183,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
   ret <4 x float> %call
 }
 
-define <4 x float> @sqrt_v4f32_check_denorms_ieee_ninf(<4 x float> %x) #3 {
+define <4 x float> @sqrt_v4f32_check_denorms_ieee_ninf(<4 x float> %x) #7 {
 ; SSE-LABEL: sqrt_v4f32_check_denorms_ieee_ninf:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    rsqrtps %xmm0, %xmm1
@@ -230,11 +230,11 @@ define <4 x float> @sqrt_v4f32_check_denorms_ieee_ninf(<4 x float> %x) #3 {
 ; AVX512-NEXT:    vcmpleps %xmm0, %xmm2, %xmm0
 ; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %call = tail call ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
+  %call = tail call fast ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
   ret <4 x float> %call
 }
 
-define <4 x float> @sqrt_v4f32_check_denorms_dynamic_ninf(<4 x float> %x) #6 {
+define <4 x float> @sqrt_v4f32_check_denorms_dynamic_ninf(<4 x float> %x) #8 {
 ; SSE-LABEL: sqrt_v4f32_check_denorms_dynamic_ninf:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    rsqrtps %xmm0, %xmm1
@@ -281,7 +281,7 @@ define <4 x float> @sqrt_v4f32_check_denorms_dynamic_ninf(<4 x float> %x) #6 {
 ; AVX512-NEXT:    vcmpleps %xmm0, %xmm2, %xmm0
 ; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %call = tail call ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
+  %call = tail call fast ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
   ret <4 x float> %call
 }
 
@@ -1019,3 +1019,8 @@ attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt"
 attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }
 attributes #5 = { "unsafe-fp-math"="true" "reciprocal-estimates"="all:0" }
 attributes #6 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,dynamic" }
+
+; Attributes without "unsafe-fp-math"="true"
+; TODO: Merge with previous attributes when this attribute can be deleted.
+attributes #7 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,ieee" } ; #3
+attributes #8 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,dynamic" } ; #6