[llvm] [DAGCombiner] Remove all `UnsafeFPMath` references (PR #146295)

Thu Aug 21 06:18:13 PDT 2025

https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/146295

>From 8137b62b75ab7ff79ca56ffa0cdeaf34ef5f5f29 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 09:14:24 +0800
Subject: [PATCH 1/9] remove `UnsafeFPMath` usages in `visitFADD`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 17703f58f2824..c5891395908a8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17697,7 +17697,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   // If 'unsafe math' or reassoc and nsz, fold lots of things.
   // TODO: break out portions of the transformations below for which Unsafe is
   //       considered and which do not require both nsz and reassoc
-  if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+  if ((Options.NoSignedZerosFPMath ||
        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
       AllowNewConst) {
     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
@@ -17784,7 +17784,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     }
   } // enable-unsafe-fp-math && AllowNewConst
 
-  if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+  if ((Options.NoSignedZerosFPMath ||
        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
     // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
     if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,

>From 92da3bbc00ef2861a0fed9223e97bad143aa74e9 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 09:30:51 +0800
Subject: [PATCH 2/9] Remove `UnsafeFPMath` in `visitFSUB`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c5891395908a8..aa2a3cadd69e7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17887,7 +17887,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
     }
   }
 
-  if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+  if ((Options.NoSignedZerosFPMath ||
        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
       N1.getOpcode() == ISD::FADD) {
     // X - (X + Y) -> -Y

>From ec1866831d7760f09cba9696b0538f1f7fa89a69 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 09:32:17 +0800
Subject: [PATCH 3/9] Remove `UnsafeFPMath` in `visitFMUL`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 llvm/test/CodeGen/AMDGPU/llvm.sin.ll          | 36 +++++++++----------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa2a3cadd69e7..f6779dbb970b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18051,7 +18051,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
-  if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
+  if (Flags.hasAllowReassociation()) {
     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
         N0.getOpcode() == ISD::FMUL) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
index 2366e39e2eedd..66b01a52da9a6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
@@ -16,7 +16,7 @@
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) {
    %sin = call float @llvm.sin.f32(float %x)
    store float %sin, ptr addrspace(1) %out
    ret void
@@ -29,7 +29,7 @@ define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul float 3.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -44,9 +44,9 @@ define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 {
-  %y = fmul float 3.0, %x
-  %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) {
+  %y = fmul reassoc float 3.0, %x
+  %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
   ret void
 }
@@ -59,7 +59,7 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul reassoc float 3.0, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -73,7 +73,7 @@ define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul float 2.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -88,9 +88,9 @@ define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 {
-  %y = fmul float 2.0, %x
-  %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) {
+  %y = fmul reassoc float 2.0, %x
+  %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
   ret void
 }
@@ -103,7 +103,7 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul reassoc float 2.0, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -117,7 +117,7 @@ define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul float 0x401921FB60000000, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -131,9 +131,9 @@ define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x)
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #2 {
-  %y = fmul float 0x401921FB60000000, %x
-  %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
+  %y = fmul reassoc float 0x401921FB60000000, %x
+  %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
   ret void
 }
@@ -145,7 +145,7 @@ define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul reassoc float 0x401921FB60000000, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -164,7 +164,7 @@ define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #
 ; GCN: v_sin_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) #1 {
+define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) {
    %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
    store <4 x float> %sin, ptr addrspace(1) %out
    ret void
@@ -174,5 +174,3 @@ declare float @llvm.sin.f32(float) #0
 declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }

>From 5fa17be3c5bc65b1404a2f4ceb07f75150a4c395 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 12:52:46 +0800
Subject: [PATCH 4/9] Remove `UnsafeFPMath` in `combineRepeatedFPDivisors`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  5 +-
 llvm/test/CodeGen/AArch64/fdiv-combine.ll     | 79 +++++++++----------
 2 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f6779dbb970b4..5b9f5b11079d8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18324,9 +18324,8 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
   // TODO: Limit this transform based on optsize/minsize - it always creates at
   //       least 1 extra instruction. But the perf win may be substantial enough
   //       that only minsize should restrict this.
-  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
   const SDNodeFlags Flags = N->getFlags();
-  if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
+  if (LegalDAG || !Flags.hasAllowReciprocal())
     return SDValue();
 
   // Skip if current node is a reciprocal/fneg-reciprocal.
@@ -18363,7 +18362,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
 
       // This division is eligible for optimization only if global unsafe math
       // is enabled or if this division allows reciprocal formation.
-      if (UnsafeMath || U->getFlags().hasAllowReciprocal())
+      if (U->getFlags().hasAllowReciprocal())
         Users.insert(U);
     }
   }
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index d8f7f0a306684..91bb8ac714908 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -11,7 +11,7 @@
 ;   a / D; b / D; c / D;
 ;                =>
 ;   recip = 1.0 / D; a * recip; b * recip; c * recip;
-define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
+define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
 ; CHECK-SD-LABEL: three_fdiv_float:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov s4, #1.00000000
@@ -28,14 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
 ; CHECK-GI-NEXT:    fdiv s2, s3, s0
 ; CHECK-GI-NEXT:    fmov s0, s4
 ; CHECK-GI-NEXT:    b foo_3f
-  %div = fdiv float %a, %D
-  %div1 = fdiv float %b, %D
-  %div2 = fdiv float %c, %D
+  %div = fdiv arcp float %a, %D
+  %div1 = fdiv arcp float %b, %D
+  %div2 = fdiv arcp float %c, %D
   tail call void @foo_3f(float %div, float %div1, float %div2)
   ret void
 }
 
-define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
 ; CHECK-SD-LABEL: three_fdiv_double:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov d4, #1.00000000
@@ -52,14 +52,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
 ; CHECK-GI-NEXT:    fdiv d2, d3, d0
 ; CHECK-GI-NEXT:    fmov d0, d4
 ; CHECK-GI-NEXT:    b foo_3d
-  %div = fdiv double %a, %D
-  %div1 = fdiv double %b, %D
-  %div2 = fdiv double %c, %D
+  %div = fdiv arcp double %a, %D
+  %div1 = fdiv arcp double %b, %D
+  %div2 = fdiv arcp double %c, %D
   tail call void @foo_3d(double %div, double %div1, double %div2)
   ret void
 }
 
-define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; CHECK-SD-LABEL: three_fdiv_4xfloat:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov v4.4s, #1.00000000
@@ -76,14 +76,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
 ; CHECK-GI-NEXT:    fdiv v2.4s, v3.4s, v0.4s
 ; CHECK-GI-NEXT:    mov v0.16b, v4.16b
 ; CHECK-GI-NEXT:    b foo_3_4xf
-  %div = fdiv <4 x float> %a, %D
-  %div1 = fdiv <4 x float> %b, %D
-  %div2 = fdiv <4 x float> %c, %D
+  %div = fdiv arcp <4 x float> %a, %D
+  %div1 = fdiv arcp <4 x float> %b, %D
+  %div2 = fdiv arcp <4 x float> %c, %D
   tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
   ret void
 }
 
-define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
+define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; CHECK-SD-LABEL: three_fdiv_2xdouble:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov v4.2d, #1.00000000
@@ -100,42 +100,42 @@ define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double>
 ; CHECK-GI-NEXT:    fdiv v2.2d, v3.2d, v0.2d
 ; CHECK-GI-NEXT:    mov v0.16b, v4.16b
 ; CHECK-GI-NEXT:    b foo_3_2xd
-  %div = fdiv <2 x double> %a, %D
-  %div1 = fdiv <2 x double> %b, %D
-  %div2 = fdiv <2 x double> %c, %D
+  %div = fdiv arcp <2 x double> %a, %D
+  %div1 = fdiv arcp <2 x double> %b, %D
+  %div2 = fdiv arcp <2 x double> %c, %D
   tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
   ret void
 }
 
 ; Following test cases check we never combine two FDIVs if neither of them
 ; calculates a reciprocal.
-define void @two_fdiv_float(float %D, float %a, float %b) #0 {
+define void @two_fdiv_float(float %D, float %a, float %b) {
 ; CHECK-LABEL: two_fdiv_float:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fdiv s3, s1, s0
 ; CHECK-NEXT:    fdiv s1, s2, s0
 ; CHECK-NEXT:    fmov s0, s3
 ; CHECK-NEXT:    b foo_2f
-  %div = fdiv float %a, %D
-  %div1 = fdiv float %b, %D
+  %div = fdiv arcp float %a, %D
+  %div1 = fdiv arcp float %b, %D
   tail call void @foo_2f(float %div, float %div1)
   ret void
 }
 
-define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+define void @two_fdiv_double(double %D, double %a, double %b) {
 ; CHECK-LABEL: two_fdiv_double:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fdiv d3, d1, d0
 ; CHECK-NEXT:    fdiv d1, d2, d0
 ; CHECK-NEXT:    fmov d0, d3
 ; CHECK-NEXT:    b foo_2d
-  %div = fdiv double %a, %D
-  %div1 = fdiv double %b, %D
+  %div = fdiv arcp double %a, %D
+  %div1 = fdiv arcp double %b, %D
   tail call void @foo_2d(double %div, double %div1)
   ret void
 }
 
-define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -157,14 +157,14 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b,
 ; CHECK-GI-NEXT:    b foo_3_4xf
   %D.ins = insertelement <4 x float> poison, float %D, i64 0
   %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
-  %div = fdiv <4 x float> %a, %splat
-  %div1 = fdiv <4 x float> %b, %splat
-  %div2 = fdiv <4 x float> %c, %splat
+  %div = fdiv arcp <4 x float> %a, %splat
+  %div1 = fdiv arcp <4 x float> %b, %splat
+  %div2 = fdiv arcp <4 x float> %c, %splat
   tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
   ret void
 }
 
-define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
+define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #0 {
 ; CHECK-SD-LABEL: splat_fdiv_v4f32:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -183,11 +183,11 @@ define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
 entry:
   %D.ins = insertelement <4 x float> poison, float %D, i64 0
   %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
-  %div = fdiv <4 x float> %a, %splat
+  %div = fdiv arcp <4 x float> %a, %splat
   ret <4 x float> %div
 }
 
-define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
+define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #0 {
 ; CHECK-LABEL: splat_fdiv_nxv4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s2, #1.00000000
@@ -198,11 +198,11 @@ define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %
 entry:
   %D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
   %splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
-  %div = fdiv <vscale x 4 x float> %a, %splat
+  %div = fdiv arcp <vscale x 4 x float> %a, %splat
   ret <vscale x 4 x float> %div
 }
 
-define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
+define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
 ; CHECK-LABEL: splat_three_fdiv_nxv4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s4, #1.00000000
@@ -215,14 +215,14 @@ define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale
 entry:
   %D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
   %splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
-  %div = fdiv <vscale x 4 x float> %a, %splat
-  %div1 = fdiv <vscale x 4 x float> %b, %splat
-  %div2 = fdiv <vscale x 4 x float> %c, %splat
+  %div = fdiv arcp <vscale x 4 x float> %a, %splat
+  %div1 = fdiv arcp <vscale x 4 x float> %b, %splat
+  %div2 = fdiv arcp <vscale x 4 x float> %c, %splat
   tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2)
   ret void
 }
 
-define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
+define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: splat_fdiv_nxv2f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
@@ -237,7 +237,7 @@ entry:
   ret <vscale x 2 x double> %div
 }
 
-define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
+define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
 ; CHECK-LABEL: splat_two_fdiv_nxv2f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov d3, #1.00000000
@@ -249,8 +249,8 @@ define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale
 entry:
   %D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
   %splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
-  %div = fdiv <vscale x 2 x double> %a, %splat
-  %div1 = fdiv <vscale x 2 x double> %b, %splat
+  %div = fdiv arcp <vscale x 2 x double> %a, %splat
+  %div1 = fdiv arcp <vscale x 2 x double> %b, %splat
   tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1)
   ret void
 }
@@ -264,5 +264,4 @@ declare void @foo_2d(double, double)
 declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
 
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }
+attributes #0 = { "target-features"="+sve" }

>From f347f76fc98dfa26e9bf5f84110fa5c1cd7d9969 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:45:53 +0800
Subject: [PATCH 5/9] Remove `UnsafeFPMath` in `visitFP_ROUND`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 llvm/test/CodeGen/X86/fp-double-rounding.ll   | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5b9f5b11079d8..60a5baadc361a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18987,7 +18987,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
     // single-step fp_round we want to fold to.
     // In other words, double rounding isn't the same as rounding.
     // Also, this is a value preserving truncation iff both fp_round's are.
-    if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
+    if (N->getFlags().hasAllowContract() || N0IsTrunc)
       return DAG.getNode(
           ISD::FP_ROUND, DL, VT, N0.getOperand(0),
           DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
diff --git a/llvm/test/CodeGen/X86/fp-double-rounding.ll b/llvm/test/CodeGen/X86/fp-double-rounding.ll
index 543908a10df29..957c0280f6653 100644
--- a/llvm/test/CodeGen/X86/fp-double-rounding.ll
+++ b/llvm/test/CodeGen/X86/fp-double-rounding.ll
@@ -4,16 +4,25 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64--"
 
-; CHECK-LABEL: double_rounding:
+; CHECK-LABEL: double_rounding_safe:
 ; SAFE: callq __trunctfdf2
 ; SAFE-NEXT: cvtsd2ss %xmm0
+define void @double_rounding_safe(ptr %x, ptr %f) {
+entry:
+  %0 = load fp128, ptr %x, align 16
+  %1 = fptrunc fp128 %0 to double
+  %2 = fptrunc double %1 to float
+  store float %2, ptr %f, align 4
+  ret void
+}
+; CHECK-LABEL: double_rounding:
 ; UNSAFE: callq __trunctfsf2
 ; UNSAFE-NOT: cvt
 define void @double_rounding(ptr %x, ptr %f) {
 entry:
   %0 = load fp128, ptr %x, align 16
-  %1 = fptrunc fp128 %0 to double
-  %2 = fptrunc double %1 to float
+  %1 = fptrunc contract fp128 %0 to double
+  %2 = fptrunc contract double %1 to float
   store float %2, ptr %f, align 4
   ret void
 }

>From fce35d5604c95fec13b8abccfbb3e88a2867e1d0 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:46:45 +0800
Subject: [PATCH 6/9] Remove an unused variable in `visitFMUL`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 60a5baadc361a..86f0f68d2841d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18027,7 +18027,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
-  const TargetOptions &Options = DAG.getTarget().Options;
   const SDNodeFlags Flags = N->getFlags();
   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
 

>From 9b05c22ffcc5fcfbd33d00cb8ad88ce13cfbd309 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:47:44 +0800
Subject: [PATCH 7/9] Remove `UnsafeFPMath` in `visitFADDForFMACombine`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  6 ++----
 llvm/test/CodeGen/ARM/fp-fast.ll              | 11 +++++------
 llvm/test/CodeGen/NVPTX/fma-assoc.ll          | 16 ++++++++--------
 llvm/test/CodeGen/X86/fma_patterns.ll         | 12 ++++++------
 llvm/test/CodeGen/X86/fma_patterns_wide.ll    | 12 ++++++------
 5 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 86f0f68d2841d..9df88865a32d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17030,8 +17030,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
   // fma A, B, (fma C, D, fma (E, F, G)).
   // This requires reassociation because it changes the order of operations.
-  bool CanReassociate =
-      Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+  bool CanReassociate = N->getFlags().hasAllowReassociation();
   if (CanReassociate) {
     SDValue FMA, E;
     if (isFusedOp(N0) && N0.hasOneUse()) {
@@ -18227,8 +18226,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
     return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
 
-  bool CanReassociate =
-      Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+  bool CanReassociate = N->getFlags().hasAllowReassociation();
   if (CanReassociate) {
     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
     if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
diff --git a/llvm/test/CodeGen/ARM/fp-fast.ll b/llvm/test/CodeGen/ARM/fp-fast.ll
index 7d95a5efe9052..6e1c783bfbe3e 100644
--- a/llvm/test/CodeGen/ARM/fp-fast.ll
+++ b/llvm/test/CodeGen/ARM/fp-fast.ll
@@ -1,5 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math %s -o - \
-; RUN:  | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -mattr=+vfp4 %s -o - | FileCheck %s
 
 ; CHECK: test1
 define float @test1(float %x) {
@@ -7,7 +6,7 @@ define float @test1(float %x) {
 ; CHECK: vmul.f32
 ; CHECK-NOT: vfma
   %t1 = fmul float %x, 3.0
-  %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %t1)
+  %t2 = call reassoc float @llvm.fma.f32(float %x, float 2.0, float %t1)
   ret float %t2
 }
 
@@ -17,7 +16,7 @@ define float @test2(float %x, float %y) {
 ; CHECK: vfma.f32
 ; CHECK-NOT: vmul
   %t1 = fmul float %x, 3.0
-  %t2 = call float @llvm.fma.f32(float %t1, float 2.0, float %y)
+  %t2 = call reassoc float @llvm.fma.f32(float %t1, float 2.0, float %y)
   ret float %t2
 }
 
@@ -44,7 +43,7 @@ define float @test5(float %x) {
 ; CHECK-NOT: vfma
 ; CHECK: vmul.f32
 ; CHECK-NOT: vfma
-  %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %x)
+  %t2 = call reassoc float @llvm.fma.f32(float %x, float 2.0, float %x)
   ret float %t2
 }
 
@@ -54,7 +53,7 @@ define float @test6(float %x) {
 ; CHECK: vmul.f32
 ; CHECK-NOT: vfma
   %t1 = fsub float -0.0, %x
-  %t2 = call float @llvm.fma.f32(float %x, float 5.0, float %t1)
+  %t2 = call reassoc float @llvm.fma.f32(float %x, float 5.0, float %t1)
   ret float %t2
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/fma-assoc.ll b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
index 1034c3eed3dc0..6693c9044ca2c 100644
--- a/llvm/test/CodeGen/NVPTX/fma-assoc.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
@@ -20,10 +20,10 @@ define ptx_device float @t1_f32(float %x, float %y, float %z,
 ; CHECK-UNSAFE-NEXT:    st.param.b32 [func_retval0], %r7;
 ; CHECK-UNSAFE-NEXT:    ret;
                                 float %u, float %v) {
-  %a = fmul float %x, %y
-  %b = fmul float %u, %v
-  %c = fadd float %a, %b
-  %d = fadd float %c, %z
+  %a = fmul reassoc float %x, %y
+  %b = fmul reassoc float %u, %v
+  %c = fadd reassoc float %a, %b
+  %d = fadd reassoc float %c, %z
   ret float %d
 }
 
@@ -43,10 +43,10 @@ define ptx_device double @t1_f64(double %x, double %y, double %z,
 ; CHECK-UNSAFE-NEXT:    st.param.b64 [func_retval0], %rd7;
 ; CHECK-UNSAFE-NEXT:    ret;
                                  double %u, double %v) {
-  %a = fmul double %x, %y
-  %b = fmul double %u, %v
-  %c = fadd double %a, %b
-  %d = fadd double %c, %z
+  %a = fmul reassoc double %x, %y
+  %b = fmul reassoc double %u, %v
+  %c = fadd reassoc double %a, %b
+  %d = fadd reassoc double %c, %z
   ret double %d
 }
 
diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll
index 0ffcb8c46cef9..acdf783f61388 100644
--- a/llvm/test/CodeGen/X86/fma_patterns.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns.ll
@@ -1672,9 +1672,9 @@ define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
-  %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
-  %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
-  %a  = fadd <4 x float> %m0, %m1
+  %m0 = fmul contract reassoc <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %m1 = fmul contract reassoc <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
+  %a  = fadd contract reassoc <4 x float> %m0, %m1
   ret <4 x float> %a
 }
 
@@ -1697,9 +1697,9 @@ define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
 ; AVX512-NEXT:    retq
-  %m0 = fmul <4 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0>
-  %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
-  %a  = fadd <4 x float> %m1, %y
+  %m0 = fmul contract reassoc <4 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0>
+  %m1 = fmul contract reassoc <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
+  %a  = fadd contract reassoc <4 x float> %m1, %y
   ret <4 x float> %a
 }
 
diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
index fe5ddca67470c..d910110467ee0 100644
--- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
@@ -1053,9 +1053,9 @@ define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
 ; AVX512-NEXT:    retq
-  %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
-  %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
-  %a  = fadd <16 x float> %m0, %m1
+  %m0 = fmul contract reassoc <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
+  %m1 = fmul contract reassoc <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
+  %a  = fadd contract reassoc <16 x float> %m0, %m1
   ret <16 x float> %a
 }
 
@@ -1080,9 +1080,9 @@ define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
 ; AVX512-NEXT:    retq
-  %m0 = fmul <16 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
-  %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
-  %a  = fadd <16 x float> %m1, %y
+  %m0 = fmul contract reassoc <16 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
+  %m1 = fmul contract reassoc <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
+  %a  = fadd contract reassoc <16 x float> %m1, %y
   ret <16 x float> %a
 }
 

>From 358d3d5b55d3524eb9a91d56ebf6228f5022513b Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Thu, 21 Aug 2025 21:14:16 +0800
Subject: [PATCH 8/9] Revert "Remove `UnsafeFPMath` in `visitFP_ROUND`"

This reverts commit f347f76fc98dfa26e9bf5f84110fa5c1cd7d9969.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 llvm/test/CodeGen/X86/fp-double-rounding.ll   | 15 +++------------
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9df88865a32d9..e0dee8f3b57fd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18984,7 +18984,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
     // single-step fp_round we want to fold to.
     // In other words, double rounding isn't the same as rounding.
     // Also, this is a value preserving truncation iff both fp_round's are.
-    if (N->getFlags().hasAllowContract() || N0IsTrunc)
+    if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
       return DAG.getNode(
           ISD::FP_ROUND, DL, VT, N0.getOperand(0),
           DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
diff --git a/llvm/test/CodeGen/X86/fp-double-rounding.ll b/llvm/test/CodeGen/X86/fp-double-rounding.ll
index 957c0280f6653..543908a10df29 100644
--- a/llvm/test/CodeGen/X86/fp-double-rounding.ll
+++ b/llvm/test/CodeGen/X86/fp-double-rounding.ll
@@ -4,25 +4,16 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64--"
 
-; CHECK-LABEL: double_rounding_safe:
+; CHECK-LABEL: double_rounding:
 ; SAFE: callq __trunctfdf2
 ; SAFE-NEXT: cvtsd2ss %xmm0
-define void @double_rounding_safe(ptr %x, ptr %f) {
-entry:
-  %0 = load fp128, ptr %x, align 16
-  %1 = fptrunc fp128 %0 to double
-  %2 = fptrunc double %1 to float
-  store float %2, ptr %f, align 4
-  ret void
-}
-; CHECK-LABEL: double_rounding:
 ; UNSAFE: callq __trunctfsf2
 ; UNSAFE-NOT: cvt
 define void @double_rounding(ptr %x, ptr %f) {
 entry:
   %0 = load fp128, ptr %x, align 16
-  %1 = fptrunc contract fp128 %0 to double
-  %2 = fptrunc contract double %1 to float
+  %1 = fptrunc fp128 %0 to double
+  %2 = fptrunc double %1 to float
   store float %2, ptr %f, align 4
   ret void
 }

>From 3b75df0b44a1447d94eff177757bdcf42070f801 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Thu, 21 Aug 2025 21:17:42 +0800
Subject: [PATCH 9/9] remove unsafe-fp-math in fma_patterns.ll

---
 llvm/test/CodeGen/X86/fma_patterns.ll | 60 +++++++++++++--------------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll
index acdf783f61388..dc35c8f8dc657 100644
--- a/llvm/test/CodeGen/X86/fma_patterns.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns.ll
@@ -1567,7 +1567,7 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
 ;
 
-define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 ; FMA-LABEL: test_v4f32_fneg_fmadd:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
@@ -1582,13 +1582,13 @@ define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
 ; AVX512-NEXT:    retq
-  %mul = fmul nsz <4 x float> %a0, %a1
-  %add = fadd nsz <4 x float> %mul, %a2
-  %neg = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  %mul = fmul contract nsz <4 x float> %a0, %a1
+  %add = fadd contract nsz <4 x float> %mul, %a2
+  %neg = fsub contract nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
   ret <4 x float> %neg
 }
 
-define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
 ; FMA-LABEL: test_v4f64_fneg_fmsub:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
@@ -1609,7 +1609,7 @@ define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <
   ret <4 x double> %neg
 }
 
-define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 ; FMA-LABEL: test_v4f32_fneg_fnmadd:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
@@ -1624,14 +1624,14 @@ define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
 ; AVX512-NEXT:    retq
-  %mul = fmul nsz <4 x float> %a0, %a1
-  %neg0 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
-  %add = fadd nsz <4 x float> %neg0, %a2
-  %neg1 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  %mul = fmul contract nsz <4 x float> %a0, %a1
+  %neg0 = fsub contract nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+  %add = fadd contract nsz <4 x float> %neg0, %a2
+  %neg1 = fsub contract nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
   ret <4 x float> %neg1
 }
 
-define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
 ; FMA-LABEL: test_v4f64_fneg_fnmsub:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
@@ -1646,10 +1646,10 @@ define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1,
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
 ; AVX512-NEXT:    retq
-  %mul = fmul nsz  <4 x double> %a0, %a1
-  %neg0 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
-  %sub = fsub nsz <4 x double> %neg0, %a2
-  %neg1 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+  %mul = fmul contract nsz  <4 x double> %a0, %a1
+  %neg0 = fsub contract nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+  %sub = fsub contract nsz <4 x double> %neg0, %a2
+  %neg1 = fsub contract nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   ret <4 x double> %neg1
 }
 
@@ -1657,7 +1657,7 @@ define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1,
 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
 ;
 
-define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
+define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) {
 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1682,7 +1682,7 @@ define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
 ;
 
-define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
+define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) {
 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
@@ -1705,7 +1705,7 @@ define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y
 
 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
 
-define double @test_f64_fneg_fmul(double %x, double %y) #0 {
+define double @test_f64_fneg_fmul(double %x, double %y) {
 ; FMA-LABEL: test_f64_fneg_fmul:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
@@ -1723,12 +1723,12 @@ define double @test_f64_fneg_fmul(double %x, double %y) #0 {
 ; AVX512-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
 ; AVX512-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
 ; AVX512-NEXT:    retq
-  %m = fmul nsz double %x, %y
-  %n = fsub double -0.0, %m
+  %m = fmul contract nsz double %x, %y
+  %n = fsub contract double -0.0, %m
   ret double %n
 }
 
-define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
+define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) {
 ; FMA-LABEL: test_v4f32_fneg_fmul:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vxorps %xmm2, %xmm2, %xmm2
@@ -1746,12 +1746,12 @@ define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
 ; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
 ; AVX512-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
 ; AVX512-NEXT:    retq
-  %m = fmul nsz <4 x float> %x, %y
-  %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
+  %m = fmul contract nsz <4 x float> %x, %y
+  %n = fsub contract <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
   ret <4 x float> %n
 }
 
-define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
+define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) {
 ; FMA-LABEL: test_v4f64_fneg_fmul:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
@@ -1769,12 +1769,12 @@ define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
 ; AVX512-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
 ; AVX512-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
 ; AVX512-NEXT:    retq
-  %m = fmul nsz <4 x double> %x, %y
-  %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
+  %m = fmul contract nsz <4 x double> %x, %y
+  %n = fsub contract <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
   ret <4 x double> %n
 }
 
-define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
+define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) {
 ; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz:
 ; FMA:       # %bb.0:
 ; FMA-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
@@ -1792,8 +1792,8 @@ define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %
 ; AVX512-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
-  %m = fmul <4 x double> %x, %y
-  %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
+  %m = fmul contract <4 x double> %x, %y
+  %n = fsub contract <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
   ret <4 x double> %n
 }
 
@@ -2022,5 +2022,3 @@ define float @fadd_fma_fmul_extra_use_3(float %a, float %b, float %c, float %d,
   %a2 = fadd fast float %n0, %a1
   ret float %a2
 }
-
-attributes #0 = { "unsafe-fp-math"="true" }