[llvm] Draft: [DAGCombiner] Remove all `UnsafeFPMath` references (PR #146295)

Sun Jun 29 23:18:53 PDT 2025

https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/146295

>From 5dda9d969699ed03538cdc65a56b3e4942aa9a5e Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 09:14:24 +0800
Subject: [PATCH 1/8] remove `UnsafeFPMath` usages in `visitFADD`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8554db0a1220c..b8e1da4749540 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17581,7 +17581,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   // If 'unsafe math' or reassoc and nsz, fold lots of things.
   // TODO: break out portions of the transformations below for which Unsafe is
   //       considered and which do not require both nsz and reassoc
-  if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+  if ((Options.NoSignedZerosFPMath ||
        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
       AllowNewConst) {
     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
@@ -17668,7 +17668,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     }
   } // enable-unsafe-fp-math && AllowNewConst
 
-  if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+  if ((Options.NoSignedZerosFPMath ||
        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
     // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
     if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,

>From 69ac7066ae492afb97c528669007189522f079ce Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 09:30:51 +0800
Subject: [PATCH 2/8] Remove `UnsafeFPMath` in `visitFSUB`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b8e1da4749540..ebc553a57440e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17771,7 +17771,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
     }
   }
 
-  if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+  if ((Options.NoSignedZerosFPMath ||
        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
       N1.getOpcode() == ISD::FADD) {
     // X - (X + Y) -> -Y

>From 4a9bbb48a49735c554b30f0207730fbe380329b1 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 09:32:17 +0800
Subject: [PATCH 3/8] Remove `UnsafeFPMath` in `visitFMUL`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 llvm/test/CodeGen/AMDGPU/llvm.sin.ll          | 36 +++++++++----------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ebc553a57440e..5d69f55e0d1df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17935,7 +17935,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
-  if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
+  if (Flags.hasAllowReassociation()) {
     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
         N0.getOpcode() == ISD::FMUL) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
index 576ed270183f6..58ebf2aafa5a8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
@@ -16,7 +16,7 @@
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) {
    %sin = call float @llvm.sin.f32(float %x)
    store float %sin, ptr addrspace(1) %out
    ret void
@@ -29,7 +29,7 @@ define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul float 3.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -44,9 +44,9 @@ define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 {
-  %y = fmul float 3.0, %x
-  %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) {
+  %y = fmul reassoc float 3.0, %x
+  %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
   ret void
 }
@@ -59,7 +59,7 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul reassoc float 3.0, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -73,7 +73,7 @@ define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul float 2.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -88,9 +88,9 @@ define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 {
-  %y = fmul float 2.0, %x
-  %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) {
+  %y = fmul reassoc float 2.0, %x
+  %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
   ret void
 }
@@ -103,7 +103,7 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul reassoc float 2.0, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -117,7 +117,7 @@ define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul float 0x401921FB60000000, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -131,9 +131,9 @@ define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x)
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #2 {
-  %y = fmul float 0x401921FB60000000, %x
-  %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
+  %y = fmul reassoc float 0x401921FB60000000, %x
+  %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
   ret void
 }
@@ -145,7 +145,7 @@ define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
   %y = fmul reassoc float 0x401921FB60000000, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -164,7 +164,7 @@ define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #
 ; GCN: v_sin_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) #1 {
+define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) {
    %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
    store <4 x float> %sin, ptr addrspace(1) %out
    ret void
@@ -174,5 +174,3 @@ declare float @llvm.sin.f32(float) #0
 declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }

>From 514d8453386f3c7768021fb3e4b0dc56e54d4feb Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 12:52:46 +0800
Subject: [PATCH 4/8] Remove `UnsafeFPMath` in `combineRepeatedFPDivisors`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  5 +-
 llvm/test/CodeGen/AArch64/fdiv-combine.ll     | 79 +++++++++----------
 llvm/test/CodeGen/NVPTX/fast-math.ll          | 28 +++----
 3 files changed, 55 insertions(+), 57 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d69f55e0d1df..fce53d3be7090 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18209,9 +18209,8 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
   // TODO: Limit this transform based on optsize/minsize - it always creates at
   //       least 1 extra instruction. But the perf win may be substantial enough
   //       that only minsize should restrict this.
-  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
   const SDNodeFlags Flags = N->getFlags();
-  if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
+  if (LegalDAG || !Flags.hasAllowReciprocal())
     return SDValue();
 
   // Skip if current node is a reciprocal/fneg-reciprocal.
@@ -18248,7 +18247,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
 
       // This division is eligible for optimization only if global unsafe math
       // is enabled or if this division allows reciprocal formation.
-      if (UnsafeMath || U->getFlags().hasAllowReciprocal())
+      if (U->getFlags().hasAllowReciprocal())
         Users.insert(U);
     }
   }
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index d8f7f0a306684..91bb8ac714908 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -11,7 +11,7 @@
 ;   a / D; b / D; c / D;
 ;                =>
 ;   recip = 1.0 / D; a * recip; b * recip; c * recip;
-define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
+define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
 ; CHECK-SD-LABEL: three_fdiv_float:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov s4, #1.00000000
@@ -28,14 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
 ; CHECK-GI-NEXT:    fdiv s2, s3, s0
 ; CHECK-GI-NEXT:    fmov s0, s4
 ; CHECK-GI-NEXT:    b foo_3f
-  %div = fdiv float %a, %D
-  %div1 = fdiv float %b, %D
-  %div2 = fdiv float %c, %D
+  %div = fdiv arcp float %a, %D
+  %div1 = fdiv arcp float %b, %D
+  %div2 = fdiv arcp float %c, %D
   tail call void @foo_3f(float %div, float %div1, float %div2)
   ret void
 }
 
-define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
 ; CHECK-SD-LABEL: three_fdiv_double:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov d4, #1.00000000
@@ -52,14 +52,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
 ; CHECK-GI-NEXT:    fdiv d2, d3, d0
 ; CHECK-GI-NEXT:    fmov d0, d4
 ; CHECK-GI-NEXT:    b foo_3d
-  %div = fdiv double %a, %D
-  %div1 = fdiv double %b, %D
-  %div2 = fdiv double %c, %D
+  %div = fdiv arcp double %a, %D
+  %div1 = fdiv arcp double %b, %D
+  %div2 = fdiv arcp double %c, %D
   tail call void @foo_3d(double %div, double %div1, double %div2)
   ret void
 }
 
-define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; CHECK-SD-LABEL: three_fdiv_4xfloat:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov v4.4s, #1.00000000
@@ -76,14 +76,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
 ; CHECK-GI-NEXT:    fdiv v2.4s, v3.4s, v0.4s
 ; CHECK-GI-NEXT:    mov v0.16b, v4.16b
 ; CHECK-GI-NEXT:    b foo_3_4xf
-  %div = fdiv <4 x float> %a, %D
-  %div1 = fdiv <4 x float> %b, %D
-  %div2 = fdiv <4 x float> %c, %D
+  %div = fdiv arcp <4 x float> %a, %D
+  %div1 = fdiv arcp <4 x float> %b, %D
+  %div2 = fdiv arcp <4 x float> %c, %D
   tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
   ret void
 }
 
-define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
+define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; CHECK-SD-LABEL: three_fdiv_2xdouble:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov v4.2d, #1.00000000
@@ -100,42 +100,42 @@ define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double>
 ; CHECK-GI-NEXT:    fdiv v2.2d, v3.2d, v0.2d
 ; CHECK-GI-NEXT:    mov v0.16b, v4.16b
 ; CHECK-GI-NEXT:    b foo_3_2xd
-  %div = fdiv <2 x double> %a, %D
-  %div1 = fdiv <2 x double> %b, %D
-  %div2 = fdiv <2 x double> %c, %D
+  %div = fdiv arcp <2 x double> %a, %D
+  %div1 = fdiv arcp <2 x double> %b, %D
+  %div2 = fdiv arcp <2 x double> %c, %D
   tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
   ret void
 }
 
 ; Following test cases check we never combine two FDIVs if neither of them
 ; calculates a reciprocal.
-define void @two_fdiv_float(float %D, float %a, float %b) #0 {
+define void @two_fdiv_float(float %D, float %a, float %b) {
 ; CHECK-LABEL: two_fdiv_float:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fdiv s3, s1, s0
 ; CHECK-NEXT:    fdiv s1, s2, s0
 ; CHECK-NEXT:    fmov s0, s3
 ; CHECK-NEXT:    b foo_2f
-  %div = fdiv float %a, %D
-  %div1 = fdiv float %b, %D
+  %div = fdiv arcp float %a, %D
+  %div1 = fdiv arcp float %b, %D
   tail call void @foo_2f(float %div, float %div1)
   ret void
 }
 
-define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+define void @two_fdiv_double(double %D, double %a, double %b) {
 ; CHECK-LABEL: two_fdiv_double:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fdiv d3, d1, d0
 ; CHECK-NEXT:    fdiv d1, d2, d0
 ; CHECK-NEXT:    fmov d0, d3
 ; CHECK-NEXT:    b foo_2d
-  %div = fdiv double %a, %D
-  %div1 = fdiv double %b, %D
+  %div = fdiv arcp double %a, %D
+  %div1 = fdiv arcp double %b, %D
   tail call void @foo_2d(double %div, double %div1)
   ret void
 }
 
-define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -157,14 +157,14 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b,
 ; CHECK-GI-NEXT:    b foo_3_4xf
   %D.ins = insertelement <4 x float> poison, float %D, i64 0
   %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
-  %div = fdiv <4 x float> %a, %splat
-  %div1 = fdiv <4 x float> %b, %splat
-  %div2 = fdiv <4 x float> %c, %splat
+  %div = fdiv arcp <4 x float> %a, %splat
+  %div1 = fdiv arcp <4 x float> %b, %splat
+  %div2 = fdiv arcp <4 x float> %c, %splat
   tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
   ret void
 }
 
-define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
+define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #0 {
 ; CHECK-SD-LABEL: splat_fdiv_v4f32:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
@@ -183,11 +183,11 @@ define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
 entry:
   %D.ins = insertelement <4 x float> poison, float %D, i64 0
   %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
-  %div = fdiv <4 x float> %a, %splat
+  %div = fdiv arcp <4 x float> %a, %splat
   ret <4 x float> %div
 }
 
-define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
+define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #0 {
 ; CHECK-LABEL: splat_fdiv_nxv4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s2, #1.00000000
@@ -198,11 +198,11 @@ define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %
 entry:
   %D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
   %splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
-  %div = fdiv <vscale x 4 x float> %a, %splat
+  %div = fdiv arcp <vscale x 4 x float> %a, %splat
   ret <vscale x 4 x float> %div
 }
 
-define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
+define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
 ; CHECK-LABEL: splat_three_fdiv_nxv4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s4, #1.00000000
@@ -215,14 +215,14 @@ define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale
 entry:
   %D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
   %splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
-  %div = fdiv <vscale x 4 x float> %a, %splat
-  %div1 = fdiv <vscale x 4 x float> %b, %splat
-  %div2 = fdiv <vscale x 4 x float> %c, %splat
+  %div = fdiv arcp <vscale x 4 x float> %a, %splat
+  %div1 = fdiv arcp <vscale x 4 x float> %b, %splat
+  %div2 = fdiv arcp <vscale x 4 x float> %c, %splat
   tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2)
   ret void
 }
 
-define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
+define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: splat_fdiv_nxv2f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
@@ -237,7 +237,7 @@ entry:
   ret <vscale x 2 x double> %div
 }
 
-define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
+define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
 ; CHECK-LABEL: splat_two_fdiv_nxv2f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov d3, #1.00000000
@@ -249,8 +249,8 @@ define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale
 entry:
   %D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
   %splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
-  %div = fdiv <vscale x 2 x double> %a, %splat
-  %div1 = fdiv <vscale x 2 x double> %b, %splat
+  %div = fdiv arcp <vscale x 2 x double> %a, %splat
+  %div1 = fdiv arcp <vscale x 2 x double> %b, %splat
   tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1)
   ret void
 }
@@ -264,5 +264,4 @@ declare void @foo_2d(double, double)
 declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
 
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index bc48d242f88fd..a3bcd708d48ff 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -395,7 +395,7 @@ define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, f
   ret float %w
 }
 
-define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
+define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) {
 ; CHECK-LABEL: repeated_div_fast(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .pred %p<2>;
@@ -416,14 +416,14 @@ define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0
 ; CHECK-NEXT:    selp.f32 %r8, %r7, %r6, %p1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
 ; CHECK-NEXT:    ret;
-  %x = fdiv float %a, %divisor
-  %y = fdiv float %b, %divisor
-  %z = fmul float %x, %y
+  %x = fdiv arcp float %a, %divisor
+  %y = fdiv contract arcp afn float %b, %divisor
+  %z = fmul contract float %x, %y
   %w = select i1 %pred, float %z, float %y
   ret float %w
 }
 
-define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) #0 {
+define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) {
 ; CHECK-LABEL: repeated_div_fast_sel(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .pred %p<2>;
@@ -441,13 +441,13 @@ define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor
 ; CHECK-NEXT:    div.approx.f32 %r5, %r3, %r4;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NEXT:    ret;
-  %x = fdiv float %a, %divisor
-  %y = fdiv float %b, %divisor
+  %x = fdiv afn float %a, %divisor
+  %y = fdiv afn float %b, %divisor
   %w = select i1 %pred, float %x, float %y
   ret float %w
 }
 
-define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
+define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
 ; CHECK-LABEL: repeated_div_fast_ftz(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .pred %p<2>;
@@ -468,14 +468,14 @@ define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor
 ; CHECK-NEXT:    selp.f32 %r8, %r7, %r6, %p1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
 ; CHECK-NEXT:    ret;
-  %x = fdiv float %a, %divisor
-  %y = fdiv float %b, %divisor
-  %z = fmul float %x, %y
+  %x = fdiv arcp float %a, %divisor
+  %y = fdiv contract arcp afn float %b, %divisor
+  %z = fmul contract float %x, %y
   %w = select i1 %pred, float %z, float %y
   ret float %w
 }
 
-define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
+define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #1 {
 ; CHECK-LABEL: repeated_div_fast_ftz_sel(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .pred %p<2>;
@@ -493,8 +493,8 @@ define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %div
 ; CHECK-NEXT:    div.approx.ftz.f32 %r5, %r3, %r4;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
 ; CHECK-NEXT:    ret;
-  %x = fdiv float %a, %divisor
-  %y = fdiv float %b, %divisor
+  %x = fdiv afn float %a, %divisor
+  %y = fdiv afn float %b, %divisor
   %w = select i1 %pred, float %x, float %y
   ret float %w
 }

>From 2de50fe0e9f0c19f8862002bb95412b3dcd8920a Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:45:31 +0800
Subject: [PATCH 5/8] propagate fast math flags to fptrunc

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 04d6fd5f48cc3..b32720ba2a853 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3908,11 +3908,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   SDLoc dl = getCurSDLoc();
+  SDNodeFlags Flags;
+  if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+    Flags.copyFMF(*FPOp);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
                            DAG.getTargetConstant(
-                               0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+                               0, dl, TLI.getPointerTy(DAG.getDataLayout())),
+                           Flags));
 }
 
 void SelectionDAGBuilder::visitFPExt(const User &I) {

>From 8b13f3d7aa111a4699252f6b6d491448ecc1dd54 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:45:53 +0800
Subject: [PATCH 6/8] Remove `UnsafeFPMath` in `visitFP_ROUND`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 llvm/test/CodeGen/X86/fp-double-rounding.ll   | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fce53d3be7090..30e758c8edfea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18860,7 +18860,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
     // single-step fp_round we want to fold to.
     // In other words, double rounding isn't the same as rounding.
     // Also, this is a value preserving truncation iff both fp_round's are.
-    if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
+    if (N->getFlags().hasAllowContract() || N0IsTrunc)
       return DAG.getNode(
           ISD::FP_ROUND, DL, VT, N0.getOperand(0),
           DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
diff --git a/llvm/test/CodeGen/X86/fp-double-rounding.ll b/llvm/test/CodeGen/X86/fp-double-rounding.ll
index 543908a10df29..957c0280f6653 100644
--- a/llvm/test/CodeGen/X86/fp-double-rounding.ll
+++ b/llvm/test/CodeGen/X86/fp-double-rounding.ll
@@ -4,16 +4,25 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64--"
 
-; CHECK-LABEL: double_rounding:
+; CHECK-LABEL: double_rounding_safe:
 ; SAFE: callq __trunctfdf2
 ; SAFE-NEXT: cvtsd2ss %xmm0
+define void @double_rounding_safe(ptr %x, ptr %f) {
+entry:
+  %0 = load fp128, ptr %x, align 16
+  %1 = fptrunc fp128 %0 to double
+  %2 = fptrunc double %1 to float
+  store float %2, ptr %f, align 4
+  ret void
+}
+; CHECK-LABEL: double_rounding:
 ; UNSAFE: callq __trunctfsf2
 ; UNSAFE-NOT: cvt
 define void @double_rounding(ptr %x, ptr %f) {
 entry:
   %0 = load fp128, ptr %x, align 16
-  %1 = fptrunc fp128 %0 to double
-  %2 = fptrunc double %1 to float
+  %1 = fptrunc contract fp128 %0 to double
+  %2 = fptrunc contract double %1 to float
   store float %2, ptr %f, align 4
   ret void
 }

>From 12b26523b091dc9d6aba002bb471d5ceefb06483 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:46:45 +0800
Subject: [PATCH 7/8] Remove an unused variable in `visitFMUL`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 30e758c8edfea..2b808109c918e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17911,7 +17911,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
-  const TargetOptions &Options = DAG.getTarget().Options;
   const SDNodeFlags Flags = N->getFlags();
   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
 

>From 6d150a3a64193d70d6f25901b750700638df82ed Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:47:44 +0800
Subject: [PATCH 8/8] Remove `UnsafeFPMath` in `visitFADDForFMACombine`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  9 +++------
 llvm/test/CodeGen/NVPTX/fma-assoc.ll          | 16 ++++++++--------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2b808109c918e..02551c59406a5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16914,8 +16914,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
   // fma A, B, (fma C, D, fma (E, F, G)).
   // This requires reassociation because it changes the order of operations.
-  bool CanReassociate =
-      Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+  bool CanReassociate = N->getFlags().hasAllowReassociation();
   if (CanReassociate) {
     SDValue FMA, E;
     if (isFusedOp(N0) && N0.hasOneUse()) {
@@ -18087,8 +18086,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
       return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
   }
 
-  // FIXME: use fast math flags instead of Options.UnsafeFPMath
-  // TODO: Finally migrate away from global TargetOptions.
+  // FIXME: Finally migrate away from global TargetOptions.
   if (Options.AllowFPOpFusion == FPOpFusion::Fast ||
       (Options.NoNaNsFPMath && Options.NoInfsFPMath) ||
       (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
@@ -18112,8 +18110,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
     return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
 
-  bool CanReassociate =
-      Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+  bool CanReassociate = N->getFlags().hasAllowReassociation();
   if (CanReassociate) {
     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
     if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
diff --git a/llvm/test/CodeGen/NVPTX/fma-assoc.ll b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
index 1034c3eed3dc0..6693c9044ca2c 100644
--- a/llvm/test/CodeGen/NVPTX/fma-assoc.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
@@ -20,10 +20,10 @@ define ptx_device float @t1_f32(float %x, float %y, float %z,
 ; CHECK-UNSAFE-NEXT:    st.param.b32 [func_retval0], %r7;
 ; CHECK-UNSAFE-NEXT:    ret;
                                 float %u, float %v) {
-  %a = fmul float %x, %y
-  %b = fmul float %u, %v
-  %c = fadd float %a, %b
-  %d = fadd float %c, %z
+  %a = fmul reassoc float %x, %y
+  %b = fmul reassoc float %u, %v
+  %c = fadd reassoc float %a, %b
+  %d = fadd reassoc float %c, %z
   ret float %d
 }
 
@@ -43,10 +43,10 @@ define ptx_device double @t1_f64(double %x, double %y, double %z,
 ; CHECK-UNSAFE-NEXT:    st.param.b64 [func_retval0], %rd7;
 ; CHECK-UNSAFE-NEXT:    ret;
                                  double %u, double %v) {
-  %a = fmul double %x, %y
-  %b = fmul double %u, %v
-  %c = fadd double %a, %b
-  %d = fadd double %c, %z
+  %a = fmul reassoc double %x, %y
+  %b = fmul reassoc double %u, %v
+  %c = fadd reassoc double %a, %b
+  %d = fadd reassoc double %c, %z
   ret double %d
 }