[llvm] Draft: [DAGCombiner] Remove all `UnsafeFPMath` references (PR #146295)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 29 23:48:13 PDT 2025
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/146295
>From 5dda9d969699ed03538cdc65a56b3e4942aa9a5e Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 09:14:24 +0800
Subject: [PATCH 1/8] remove `UnsafeFPMath` usages in `visitFADD`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8554db0a1220c..b8e1da4749540 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17581,7 +17581,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// If 'unsafe math' or reassoc and nsz, fold lots of things.
// TODO: break out portions of the transformations below for which Unsafe is
// considered and which do not require both nsz and reassoc
- if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ if ((Options.NoSignedZerosFPMath ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
@@ -17668,7 +17668,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
} // enable-unsafe-fp-math && AllowNewConst
- if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ if ((Options.NoSignedZerosFPMath ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
// Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
>From 69ac7066ae492afb97c528669007189522f079ce Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 09:30:51 +0800
Subject: [PATCH 2/8] Remove `UnsafeFPMath` in `visitFSUB`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b8e1da4749540..ebc553a57440e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17771,7 +17771,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
- if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ if ((Options.NoSignedZerosFPMath ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
>From 4a9bbb48a49735c554b30f0207730fbe380329b1 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 09:32:17 +0800
Subject: [PATCH 3/8] Remove `UnsafeFPMath` in `visitFMUL`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
llvm/test/CodeGen/AMDGPU/llvm.sin.ll | 36 +++++++++----------
2 files changed, 18 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ebc553a57440e..5d69f55e0d1df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17935,7 +17935,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
+ if (Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
N0.getOpcode() == ISD::FMUL) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
index 576ed270183f6..58ebf2aafa5a8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
@@ -16,7 +16,7 @@
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) {
%sin = call float @llvm.sin.f32(float %x)
store float %sin, ptr addrspace(1) %out
ret void
@@ -29,7 +29,7 @@ define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) {
%y = fmul float 3.0, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -44,9 +44,9 @@ define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 {
- %y = fmul float 3.0, %x
- %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) {
+ %y = fmul reassoc float 3.0, %x
+ %sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
ret void
}
@@ -59,7 +59,7 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) {
%y = fmul reassoc float 3.0, %x
%sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -73,7 +73,7 @@ define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) {
%y = fmul float 2.0, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -88,9 +88,9 @@ define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 {
- %y = fmul float 2.0, %x
- %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) {
+ %y = fmul reassoc float 2.0, %x
+ %sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
ret void
}
@@ -103,7 +103,7 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) {
%y = fmul reassoc float 2.0, %x
%sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -117,7 +117,7 @@ define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
%y = fmul float 0x401921FB60000000, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -131,9 +131,9 @@ define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x)
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #2 {
- %y = fmul float 0x401921FB60000000, %x
- %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
+ %y = fmul reassoc float 0x401921FB60000000, %x
+ %sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
ret void
}
@@ -145,7 +145,7 @@ define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
%y = fmul reassoc float 0x401921FB60000000, %x
%sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -164,7 +164,7 @@ define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #
; GCN: v_sin_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) #1 {
+define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) {
%sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
store <4 x float> %sin, ptr addrspace(1) %out
ret void
@@ -174,5 +174,3 @@ declare float @llvm.sin.f32(float) #0
declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
>From 514d8453386f3c7768021fb3e4b0dc56e54d4feb Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 12:52:46 +0800
Subject: [PATCH 4/8] Remove `UnsafeFPMath` in `combineRepeatedFPDivisors`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +-
llvm/test/CodeGen/AArch64/fdiv-combine.ll | 79 +++++++++----------
llvm/test/CodeGen/NVPTX/fast-math.ll | 28 +++----
3 files changed, 55 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d69f55e0d1df..fce53d3be7090 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18209,9 +18209,8 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// TODO: Limit this transform based on optsize/minsize - it always creates at
// least 1 extra instruction. But the perf win may be substantial enough
// that only minsize should restrict this.
- bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
const SDNodeFlags Flags = N->getFlags();
- if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
+ if (LegalDAG || !Flags.hasAllowReciprocal())
return SDValue();
// Skip if current node is a reciprocal/fneg-reciprocal.
@@ -18248,7 +18247,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// This division is eligible for optimization only if global unsafe math
// is enabled or if this division allows reciprocal formation.
- if (UnsafeMath || U->getFlags().hasAllowReciprocal())
+ if (U->getFlags().hasAllowReciprocal())
Users.insert(U);
}
}
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index d8f7f0a306684..91bb8ac714908 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -11,7 +11,7 @@
; a / D; b / D; c / D;
; =>
; recip = 1.0 / D; a * recip; b * recip; c * recip;
-define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
+define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
; CHECK-SD-LABEL: three_fdiv_float:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s4, #1.00000000
@@ -28,14 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
; CHECK-GI-NEXT: fdiv s2, s3, s0
; CHECK-GI-NEXT: fmov s0, s4
; CHECK-GI-NEXT: b foo_3f
- %div = fdiv float %a, %D
- %div1 = fdiv float %b, %D
- %div2 = fdiv float %c, %D
+ %div = fdiv arcp float %a, %D
+ %div1 = fdiv arcp float %b, %D
+ %div2 = fdiv arcp float %c, %D
tail call void @foo_3f(float %div, float %div1, float %div2)
ret void
}
-define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
; CHECK-SD-LABEL: three_fdiv_double:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov d4, #1.00000000
@@ -52,14 +52,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
; CHECK-GI-NEXT: fdiv d2, d3, d0
; CHECK-GI-NEXT: fmov d0, d4
; CHECK-GI-NEXT: b foo_3d
- %div = fdiv double %a, %D
- %div1 = fdiv double %b, %D
- %div2 = fdiv double %c, %D
+ %div = fdiv arcp double %a, %D
+ %div1 = fdiv arcp double %b, %D
+ %div2 = fdiv arcp double %c, %D
tail call void @foo_3d(double %div, double %div1, double %div2)
ret void
}
-define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-SD-LABEL: three_fdiv_4xfloat:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
@@ -76,14 +76,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s
; CHECK-GI-NEXT: mov v0.16b, v4.16b
; CHECK-GI-NEXT: b foo_3_4xf
- %div = fdiv <4 x float> %a, %D
- %div1 = fdiv <4 x float> %b, %D
- %div2 = fdiv <4 x float> %c, %D
+ %div = fdiv arcp <4 x float> %a, %D
+ %div1 = fdiv arcp <4 x float> %b, %D
+ %div2 = fdiv arcp <4 x float> %c, %D
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
ret void
}
-define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
+define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-SD-LABEL: three_fdiv_2xdouble:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov v4.2d, #1.00000000
@@ -100,42 +100,42 @@ define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double>
; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d
; CHECK-GI-NEXT: mov v0.16b, v4.16b
; CHECK-GI-NEXT: b foo_3_2xd
- %div = fdiv <2 x double> %a, %D
- %div1 = fdiv <2 x double> %b, %D
- %div2 = fdiv <2 x double> %c, %D
+ %div = fdiv arcp <2 x double> %a, %D
+ %div1 = fdiv arcp <2 x double> %b, %D
+ %div2 = fdiv arcp <2 x double> %c, %D
tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
ret void
}
; Following test cases check we never combine two FDIVs if neither of them
; calculates a reciprocal.
-define void @two_fdiv_float(float %D, float %a, float %b) #0 {
+define void @two_fdiv_float(float %D, float %a, float %b) {
; CHECK-LABEL: two_fdiv_float:
; CHECK: // %bb.0:
; CHECK-NEXT: fdiv s3, s1, s0
; CHECK-NEXT: fdiv s1, s2, s0
; CHECK-NEXT: fmov s0, s3
; CHECK-NEXT: b foo_2f
- %div = fdiv float %a, %D
- %div1 = fdiv float %b, %D
+ %div = fdiv arcp float %a, %D
+ %div1 = fdiv arcp float %b, %D
tail call void @foo_2f(float %div, float %div1)
ret void
}
-define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+define void @two_fdiv_double(double %D, double %a, double %b) {
; CHECK-LABEL: two_fdiv_double:
; CHECK: // %bb.0:
; CHECK-NEXT: fdiv d3, d1, d0
; CHECK-NEXT: fdiv d1, d2, d0
; CHECK-NEXT: fmov d0, d3
; CHECK-NEXT: b foo_2d
- %div = fdiv double %a, %D
- %div1 = fdiv double %b, %D
+ %div = fdiv arcp double %a, %D
+ %div1 = fdiv arcp double %b, %D
tail call void @foo_2d(double %div, double %div1)
ret void
}
-define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
@@ -157,14 +157,14 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b,
; CHECK-GI-NEXT: b foo_3_4xf
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
- %div = fdiv <4 x float> %a, %splat
- %div1 = fdiv <4 x float> %b, %splat
- %div2 = fdiv <4 x float> %c, %splat
+ %div = fdiv arcp <4 x float> %a, %splat
+ %div1 = fdiv arcp <4 x float> %b, %splat
+ %div2 = fdiv arcp <4 x float> %c, %splat
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
ret void
}
-define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
+define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #0 {
; CHECK-SD-LABEL: splat_fdiv_v4f32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
@@ -183,11 +183,11 @@ define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
entry:
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
- %div = fdiv <4 x float> %a, %splat
+ %div = fdiv arcp <4 x float> %a, %splat
ret <4 x float> %div
}
-define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
+define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #0 {
; CHECK-LABEL: splat_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov s2, #1.00000000
@@ -198,11 +198,11 @@ define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %
entry:
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
- %div = fdiv <vscale x 4 x float> %a, %splat
+ %div = fdiv arcp <vscale x 4 x float> %a, %splat
ret <vscale x 4 x float> %div
}
-define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
+define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
; CHECK-LABEL: splat_three_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov s4, #1.00000000
@@ -215,14 +215,14 @@ define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale
entry:
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
- %div = fdiv <vscale x 4 x float> %a, %splat
- %div1 = fdiv <vscale x 4 x float> %b, %splat
- %div2 = fdiv <vscale x 4 x float> %c, %splat
+ %div = fdiv arcp <vscale x 4 x float> %a, %splat
+ %div1 = fdiv arcp <vscale x 4 x float> %b, %splat
+ %div2 = fdiv arcp <vscale x 4 x float> %c, %splat
tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2)
ret void
}
-define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
+define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #0 {
; CHECK-LABEL: splat_fdiv_nxv2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -237,7 +237,7 @@ entry:
ret <vscale x 2 x double> %div
}
-define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
+define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: splat_two_fdiv_nxv2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov d3, #1.00000000
@@ -249,8 +249,8 @@ define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale
entry:
%D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
%splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
- %div = fdiv <vscale x 2 x double> %a, %splat
- %div1 = fdiv <vscale x 2 x double> %b, %splat
+ %div = fdiv arcp <vscale x 2 x double> %a, %splat
+ %div1 = fdiv arcp <vscale x 2 x double> %b, %splat
tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1)
ret void
}
@@ -264,5 +264,4 @@ declare void @foo_2d(double, double)
declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index bc48d242f88fd..a3bcd708d48ff 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -395,7 +395,7 @@ define float @repeated_div_recip_allowed_ftz_sel(i1 %pred, float %a, float %b, f
ret float %w
}
-define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
+define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) {
; CHECK-LABEL: repeated_div_fast(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -416,14 +416,14 @@ define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0
; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
- %z = fmul float %x, %y
+ %x = fdiv arcp float %a, %divisor
+ %y = fdiv contract arcp afn float %b, %divisor
+ %z = fmul contract float %x, %y
%w = select i1 %pred, float %z, float %y
ret float %w
}
-define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) #0 {
+define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor) {
; CHECK-LABEL: repeated_div_fast_sel(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -441,13 +441,13 @@ define float @repeated_div_fast_sel(i1 %pred, float %a, float %b, float %divisor
; CHECK-NEXT: div.approx.f32 %r5, %r3, %r4;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
+ %x = fdiv afn float %a, %divisor
+ %y = fdiv afn float %b, %divisor
%w = select i1 %pred, float %x, float %y
ret float %w
}
-define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
+define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
; CHECK-LABEL: repeated_div_fast_ftz(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -468,14 +468,14 @@ define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor
; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
- %z = fmul float %x, %y
+ %x = fdiv arcp float %a, %divisor
+ %y = fdiv contract arcp afn float %b, %divisor
+ %z = fmul contract float %x, %y
%w = select i1 %pred, float %z, float %y
ret float %w
}
-define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
+define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %divisor) #1 {
; CHECK-LABEL: repeated_div_fast_ftz_sel(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
@@ -493,8 +493,8 @@ define float @repeated_div_fast_ftz_sel(i1 %pred, float %a, float %b, float %div
; CHECK-NEXT: div.approx.ftz.f32 %r5, %r3, %r4;
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
; CHECK-NEXT: ret;
- %x = fdiv float %a, %divisor
- %y = fdiv float %b, %divisor
+ %x = fdiv afn float %a, %divisor
+ %y = fdiv afn float %b, %divisor
%w = select i1 %pred, float %x, float %y
ret float %w
}
>From 2de50fe0e9f0c19f8862002bb95412b3dcd8920a Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:45:31 +0800
Subject: [PATCH 5/8] propagate fast math flags to fptrunc
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 04d6fd5f48cc3..b32720ba2a853 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3908,11 +3908,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
// FPTrunc is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
+ SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
DAG.getTargetConstant(
- 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+ 0, dl, TLI.getPointerTy(DAG.getDataLayout())),
+ Flags));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
>From 8b13f3d7aa111a4699252f6b6d491448ecc1dd54 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:45:53 +0800
Subject: [PATCH 6/8] Remove `UnsafeFPMath` in `visitFP_ROUND`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
llvm/test/CodeGen/X86/fp-double-rounding.ll | 15 ++++++++++++---
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fce53d3be7090..30e758c8edfea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18860,7 +18860,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// single-step fp_round we want to fold to.
// In other words, double rounding isn't the same as rounding.
// Also, this is a value preserving truncation iff both fp_round's are.
- if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
+ if (N->getFlags().hasAllowContract() || N0IsTrunc)
return DAG.getNode(
ISD::FP_ROUND, DL, VT, N0.getOperand(0),
DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
diff --git a/llvm/test/CodeGen/X86/fp-double-rounding.ll b/llvm/test/CodeGen/X86/fp-double-rounding.ll
index 543908a10df29..957c0280f6653 100644
--- a/llvm/test/CodeGen/X86/fp-double-rounding.ll
+++ b/llvm/test/CodeGen/X86/fp-double-rounding.ll
@@ -4,16 +4,25 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64--"
-; CHECK-LABEL: double_rounding:
+; CHECK-LABEL: double_rounding_safe:
; SAFE: callq __trunctfdf2
; SAFE-NEXT: cvtsd2ss %xmm0
+define void @double_rounding_safe(ptr %x, ptr %f) {
+entry:
+ %0 = load fp128, ptr %x, align 16
+ %1 = fptrunc fp128 %0 to double
+ %2 = fptrunc double %1 to float
+ store float %2, ptr %f, align 4
+ ret void
+}
+; CHECK-LABEL: double_rounding:
; UNSAFE: callq __trunctfsf2
; UNSAFE-NOT: cvt
define void @double_rounding(ptr %x, ptr %f) {
entry:
%0 = load fp128, ptr %x, align 16
- %1 = fptrunc fp128 %0 to double
- %2 = fptrunc double %1 to float
+ %1 = fptrunc contract fp128 %0 to double
+ %2 = fptrunc contract double %1 to float
store float %2, ptr %f, align 4
ret void
}
>From 12b26523b091dc9d6aba002bb471d5ceefb06483 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:46:45 +0800
Subject: [PATCH 7/8] Remove an unused variable in `visitFMUL`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 30e758c8edfea..2b808109c918e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17911,7 +17911,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
>From c965666466fdb1f0467830ab4200c9d937588ebc Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 30 Jun 2025 13:47:44 +0800
Subject: [PATCH 8/8] Remove `UnsafeFPMath` in `visitFADDForFMACombine`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 +++------
llvm/test/CodeGen/ARM/fp-fast.ll | 11 +++++------
llvm/test/CodeGen/NVPTX/fma-assoc.ll | 16 ++++++++--------
llvm/test/CodeGen/X86/fma_patterns.ll | 12 ++++++------
llvm/test/CodeGen/X86/fma_patterns_wide.ll | 12 ++++++------
5 files changed, 28 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2b808109c918e..02551c59406a5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16914,8 +16914,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
// fma A, B, (fma C, D, fma (E, F, G)).
// This requires reassociation because it changes the order of operations.
- bool CanReassociate =
- Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+ bool CanReassociate = N->getFlags().hasAllowReassociation();
if (CanReassociate) {
SDValue FMA, E;
if (isFusedOp(N0) && N0.hasOneUse()) {
@@ -18087,8 +18086,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
}
- // FIXME: use fast math flags instead of Options.UnsafeFPMath
- // TODO: Finally migrate away from global TargetOptions.
+ // FIXME: Finally migrate away from global TargetOptions.
if (Options.AllowFPOpFusion == FPOpFusion::Fast ||
(Options.NoNaNsFPMath && Options.NoInfsFPMath) ||
(N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
@@ -18112,8 +18110,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
- bool CanReassociate =
- Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+ bool CanReassociate = N->getFlags().hasAllowReassociation();
if (CanReassociate) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
diff --git a/llvm/test/CodeGen/ARM/fp-fast.ll b/llvm/test/CodeGen/ARM/fp-fast.ll
index 7d95a5efe9052..6e1c783bfbe3e 100644
--- a/llvm/test/CodeGen/ARM/fp-fast.ll
+++ b/llvm/test/CodeGen/ARM/fp-fast.ll
@@ -1,5 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math %s -o - \
-; RUN: | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 -mattr=+vfp4 %s -o - | FileCheck %s
; CHECK: test1
define float @test1(float %x) {
@@ -7,7 +6,7 @@ define float @test1(float %x) {
; CHECK: vmul.f32
; CHECK-NOT: vfma
%t1 = fmul float %x, 3.0
- %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %t1)
+ %t2 = call reassoc float @llvm.fma.f32(float %x, float 2.0, float %t1)
ret float %t2
}
@@ -17,7 +16,7 @@ define float @test2(float %x, float %y) {
; CHECK: vfma.f32
; CHECK-NOT: vmul
%t1 = fmul float %x, 3.0
- %t2 = call float @llvm.fma.f32(float %t1, float 2.0, float %y)
+ %t2 = call reassoc float @llvm.fma.f32(float %t1, float 2.0, float %y)
ret float %t2
}
@@ -44,7 +43,7 @@ define float @test5(float %x) {
; CHECK-NOT: vfma
; CHECK: vmul.f32
; CHECK-NOT: vfma
- %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %x)
+ %t2 = call reassoc float @llvm.fma.f32(float %x, float 2.0, float %x)
ret float %t2
}
@@ -54,7 +53,7 @@ define float @test6(float %x) {
; CHECK: vmul.f32
; CHECK-NOT: vfma
%t1 = fsub float -0.0, %x
- %t2 = call float @llvm.fma.f32(float %x, float 5.0, float %t1)
+ %t2 = call reassoc float @llvm.fma.f32(float %x, float 5.0, float %t1)
ret float %t2
}
diff --git a/llvm/test/CodeGen/NVPTX/fma-assoc.ll b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
index 1034c3eed3dc0..6693c9044ca2c 100644
--- a/llvm/test/CodeGen/NVPTX/fma-assoc.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-assoc.ll
@@ -20,10 +20,10 @@ define ptx_device float @t1_f32(float %x, float %y, float %z,
; CHECK-UNSAFE-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-UNSAFE-NEXT: ret;
float %u, float %v) {
- %a = fmul float %x, %y
- %b = fmul float %u, %v
- %c = fadd float %a, %b
- %d = fadd float %c, %z
+ %a = fmul reassoc float %x, %y
+ %b = fmul reassoc float %u, %v
+ %c = fadd reassoc float %a, %b
+ %d = fadd reassoc float %c, %z
ret float %d
}
@@ -43,10 +43,10 @@ define ptx_device double @t1_f64(double %x, double %y, double %z,
; CHECK-UNSAFE-NEXT: st.param.b64 [func_retval0], %rd7;
; CHECK-UNSAFE-NEXT: ret;
double %u, double %v) {
- %a = fmul double %x, %y
- %b = fmul double %u, %v
- %c = fadd double %a, %b
- %d = fadd double %c, %z
+ %a = fmul reassoc double %x, %y
+ %b = fmul reassoc double %u, %v
+ %c = fadd reassoc double %a, %b
+ %d = fadd reassoc double %c, %z
ret double %d
}
diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll
index 0ffcb8c46cef9..acdf783f61388 100644
--- a/llvm/test/CodeGen/X86/fma_patterns.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns.ll
@@ -1672,9 +1672,9 @@ define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
; AVX512: # %bb.0:
; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: retq
- %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
- %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
- %a = fadd <4 x float> %m0, %m1
+ %m0 = fmul contract reassoc <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %m1 = fmul contract reassoc <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
+ %a = fadd contract reassoc <4 x float> %m0, %m1
ret <4 x float> %a
}
@@ -1697,9 +1697,9 @@ define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y
; AVX512: # %bb.0:
; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
; AVX512-NEXT: retq
- %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
- %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
- %a = fadd <4 x float> %m1, %y
+ %m0 = fmul contract reassoc <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %m1 = fmul contract reassoc <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
+ %a = fadd contract reassoc <4 x float> %m1, %y
ret <4 x float> %a
}
diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
index fe5ddca67470c..d910110467ee0 100644
--- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
@@ -1053,9 +1053,9 @@ define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
; AVX512: # %bb.0:
; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: retq
- %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
- %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
- %a = fadd <16 x float> %m0, %m1
+ %m0 = fmul contract reassoc <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
+ %m1 = fmul contract reassoc <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
+ %a = fadd contract reassoc <16 x float> %m0, %m1
ret <16 x float> %a
}
@@ -1080,9 +1080,9 @@ define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float
; AVX512: # %bb.0:
; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
; AVX512-NEXT: retq
- %m0 = fmul <16 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
- %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
- %a = fadd <16 x float> %m1, %y
+ %m0 = fmul contract reassoc <16 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
+ %m1 = fmul contract reassoc <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
+ %a = fadd contract reassoc <16 x float> %m1, %y
ret <16 x float> %a
}
More information about the llvm-commits
mailing list