[llvm] Draft: [DAGCombiner] Remove all `UnsafeFPMath` references (PR #146295)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 30 00:28:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
@llvm/pr-subscribers-backend-x86
Author: None (paperchalice)
<details>
<summary>Changes</summary>
This pull request removes all references to `UnsafeFPMath` in dag combiner.
- Set fast math flags for `fptrunc` dag node, which was introduced in #<!-- -->115894.
- Set fast math flags in some tests.
---
Patch is 32.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146295.diff
10 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+10-15)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+5-1)
- (modified) llvm/test/CodeGen/AArch64/fdiv-combine.ll (+39-40)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.sin.ll (+17-19)
- (modified) llvm/test/CodeGen/ARM/fp-fast.ll (+5-6)
- (modified) llvm/test/CodeGen/NVPTX/fast-math.ll (+14-14)
- (modified) llvm/test/CodeGen/NVPTX/fma-assoc.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/fma_patterns.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/fma_patterns_wide.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/fp-double-rounding.ll (+12-3)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8554db0a1220c..02551c59406a5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16914,8 +16914,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
// fma A, B, (fma C, D, fma (E, F, G)).
// This requires reassociation because it changes the order of operations.
- bool CanReassociate =
- Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+ bool CanReassociate = N->getFlags().hasAllowReassociation();
if (CanReassociate) {
SDValue FMA, E;
if (isFusedOp(N0) && N0.hasOneUse()) {
@@ -17581,7 +17580,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// If 'unsafe math' or reassoc and nsz, fold lots of things.
// TODO: break out portions of the transformations below for which Unsafe is
// considered and which do not require both nsz and reassoc
- if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ if ((Options.NoSignedZerosFPMath ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
@@ -17668,7 +17667,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
} // enable-unsafe-fp-math && AllowNewConst
- if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ if ((Options.NoSignedZerosFPMath ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) {
// Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
@@ -17771,7 +17770,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
- if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ if ((Options.NoSignedZerosFPMath ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
@@ -17911,7 +17910,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
- const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -17935,7 +17933,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
+ if (Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
N0.getOpcode() == ISD::FMUL) {
@@ -18088,8 +18086,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
}
- // FIXME: use fast math flags instead of Options.UnsafeFPMath
- // TODO: Finally migrate away from global TargetOptions.
+ // FIXME: Finally migrate away from global TargetOptions.
if (Options.AllowFPOpFusion == FPOpFusion::Fast ||
(Options.NoNaNsFPMath && Options.NoInfsFPMath) ||
(N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
@@ -18113,8 +18110,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
- bool CanReassociate =
- Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+ bool CanReassociate = N->getFlags().hasAllowReassociation();
if (CanReassociate) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
@@ -18209,9 +18205,8 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// TODO: Limit this transform based on optsize/minsize - it always creates at
// least 1 extra instruction. But the perf win may be substantial enough
// that only minsize should restrict this.
- bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
const SDNodeFlags Flags = N->getFlags();
- if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
+ if (LegalDAG || !Flags.hasAllowReciprocal())
return SDValue();
// Skip if current node is a reciprocal/fneg-reciprocal.
@@ -18248,7 +18243,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// This division is eligible for optimization only if global unsafe math
// is enabled or if this division allows reciprocal formation.
- if (UnsafeMath || U->getFlags().hasAllowReciprocal())
+ if (U->getFlags().hasAllowReciprocal())
Users.insert(U);
}
}
@@ -18861,7 +18856,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// single-step fp_round we want to fold to.
// In other words, double rounding isn't the same as rounding.
// Also, this is a value preserving truncation iff both fp_round's are.
- if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
+ if (N->getFlags().hasAllowContract() || N0IsTrunc)
return DAG.getNode(
ISD::FP_ROUND, DL, VT, N0.getOperand(0),
DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 04d6fd5f48cc3..b32720ba2a853 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3908,11 +3908,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
// FPTrunc is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
+ SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
DAG.getTargetConstant(
- 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+ 0, dl, TLI.getPointerTy(DAG.getDataLayout())),
+ Flags));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index d8f7f0a306684..91bb8ac714908 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -11,7 +11,7 @@
; a / D; b / D; c / D;
; =>
; recip = 1.0 / D; a * recip; b * recip; c * recip;
-define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
+define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
; CHECK-SD-LABEL: three_fdiv_float:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s4, #1.00000000
@@ -28,14 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
; CHECK-GI-NEXT: fdiv s2, s3, s0
; CHECK-GI-NEXT: fmov s0, s4
; CHECK-GI-NEXT: b foo_3f
- %div = fdiv float %a, %D
- %div1 = fdiv float %b, %D
- %div2 = fdiv float %c, %D
+ %div = fdiv arcp float %a, %D
+ %div1 = fdiv arcp float %b, %D
+ %div2 = fdiv arcp float %c, %D
tail call void @foo_3f(float %div, float %div1, float %div2)
ret void
}
-define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
; CHECK-SD-LABEL: three_fdiv_double:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov d4, #1.00000000
@@ -52,14 +52,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
; CHECK-GI-NEXT: fdiv d2, d3, d0
; CHECK-GI-NEXT: fmov d0, d4
; CHECK-GI-NEXT: b foo_3d
- %div = fdiv double %a, %D
- %div1 = fdiv double %b, %D
- %div2 = fdiv double %c, %D
+ %div = fdiv arcp double %a, %D
+ %div1 = fdiv arcp double %b, %D
+ %div2 = fdiv arcp double %c, %D
tail call void @foo_3d(double %div, double %div1, double %div2)
ret void
}
-define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-SD-LABEL: three_fdiv_4xfloat:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
@@ -76,14 +76,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s
; CHECK-GI-NEXT: mov v0.16b, v4.16b
; CHECK-GI-NEXT: b foo_3_4xf
- %div = fdiv <4 x float> %a, %D
- %div1 = fdiv <4 x float> %b, %D
- %div2 = fdiv <4 x float> %c, %D
+ %div = fdiv arcp <4 x float> %a, %D
+ %div1 = fdiv arcp <4 x float> %b, %D
+ %div2 = fdiv arcp <4 x float> %c, %D
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
ret void
}
-define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
+define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-SD-LABEL: three_fdiv_2xdouble:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov v4.2d, #1.00000000
@@ -100,42 +100,42 @@ define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double>
; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d
; CHECK-GI-NEXT: mov v0.16b, v4.16b
; CHECK-GI-NEXT: b foo_3_2xd
- %div = fdiv <2 x double> %a, %D
- %div1 = fdiv <2 x double> %b, %D
- %div2 = fdiv <2 x double> %c, %D
+ %div = fdiv arcp <2 x double> %a, %D
+ %div1 = fdiv arcp <2 x double> %b, %D
+ %div2 = fdiv arcp <2 x double> %c, %D
tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
ret void
}
; Following test cases check we never combine two FDIVs if neither of them
; calculates a reciprocal.
-define void @two_fdiv_float(float %D, float %a, float %b) #0 {
+define void @two_fdiv_float(float %D, float %a, float %b) {
; CHECK-LABEL: two_fdiv_float:
; CHECK: // %bb.0:
; CHECK-NEXT: fdiv s3, s1, s0
; CHECK-NEXT: fdiv s1, s2, s0
; CHECK-NEXT: fmov s0, s3
; CHECK-NEXT: b foo_2f
- %div = fdiv float %a, %D
- %div1 = fdiv float %b, %D
+ %div = fdiv arcp float %a, %D
+ %div1 = fdiv arcp float %b, %D
tail call void @foo_2f(float %div, float %div1)
ret void
}
-define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+define void @two_fdiv_double(double %D, double %a, double %b) {
; CHECK-LABEL: two_fdiv_double:
; CHECK: // %bb.0:
; CHECK-NEXT: fdiv d3, d1, d0
; CHECK-NEXT: fdiv d1, d2, d0
; CHECK-NEXT: fmov d0, d3
; CHECK-NEXT: b foo_2d
- %div = fdiv double %a, %D
- %div1 = fdiv double %b, %D
+ %div = fdiv arcp double %a, %D
+ %div1 = fdiv arcp double %b, %D
tail call void @foo_2d(double %div, double %div1)
ret void
}
-define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
@@ -157,14 +157,14 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b,
; CHECK-GI-NEXT: b foo_3_4xf
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
- %div = fdiv <4 x float> %a, %splat
- %div1 = fdiv <4 x float> %b, %splat
- %div2 = fdiv <4 x float> %c, %splat
+ %div = fdiv arcp <4 x float> %a, %splat
+ %div1 = fdiv arcp <4 x float> %b, %splat
+ %div2 = fdiv arcp <4 x float> %c, %splat
tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
ret void
}
-define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
+define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #0 {
; CHECK-SD-LABEL: splat_fdiv_v4f32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
@@ -183,11 +183,11 @@ define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
entry:
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
- %div = fdiv <4 x float> %a, %splat
+ %div = fdiv arcp <4 x float> %a, %splat
ret <4 x float> %div
}
-define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
+define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #0 {
; CHECK-LABEL: splat_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov s2, #1.00000000
@@ -198,11 +198,11 @@ define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %
entry:
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
- %div = fdiv <vscale x 4 x float> %a, %splat
+ %div = fdiv arcp <vscale x 4 x float> %a, %splat
ret <vscale x 4 x float> %div
}
-define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
+define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
; CHECK-LABEL: splat_three_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov s4, #1.00000000
@@ -215,14 +215,14 @@ define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale
entry:
%D.ins = insertelement <vscale x 4 x float> poison, float %D, i64 0
%splat = shufflevector <vscale x 4 x float> %D.ins, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
- %div = fdiv <vscale x 4 x float> %a, %splat
- %div1 = fdiv <vscale x 4 x float> %b, %splat
- %div2 = fdiv <vscale x 4 x float> %c, %splat
+ %div = fdiv arcp <vscale x 4 x float> %a, %splat
+ %div1 = fdiv arcp <vscale x 4 x float> %b, %splat
+ %div2 = fdiv arcp <vscale x 4 x float> %c, %splat
tail call void @foo_3_nxv4f32(<vscale x 4 x float> %div, <vscale x 4 x float> %div1, <vscale x 4 x float> %div2)
ret void
}
-define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
+define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #0 {
; CHECK-LABEL: splat_fdiv_nxv2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -237,7 +237,7 @@ entry:
ret <vscale x 2 x double> %div
}
-define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
+define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: splat_two_fdiv_nxv2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov d3, #1.00000000
@@ -249,8 +249,8 @@ define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale
entry:
%D.ins = insertelement <vscale x 2 x double> poison, double %D, i64 0
%splat = shufflevector <vscale x 2 x double> %D.ins, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
- %div = fdiv <vscale x 2 x double> %a, %splat
- %div1 = fdiv <vscale x 2 x double> %b, %splat
+ %div = fdiv arcp <vscale x 2 x double> %a, %splat
+ %div1 = fdiv arcp <vscale x 2 x double> %b, %splat
tail call void @foo_2_nxv2f64(<vscale x 2 x double> %div, <vscale x 2 x double> %div1)
ret void
}
@@ -264,5 +264,4 @@ declare void @foo_2d(double, double)
declare void @foo_3_nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
declare void @foo_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
index 576ed270183f6..58ebf2aafa5a8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
@@ -16,7 +16,7 @@
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) {
%sin = call float @llvm.sin.f32(float %x)
store float %sin, ptr addrspace(1) %out
ret void
@@ -29,7 +29,7 @@ define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) {
%y = fmul float 3.0, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -44,9 +44,9 @@ define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 {
- %y = fmul float 3.0, %x
- %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) {
+ %y = fmul reassoc float 3.0, %x
+ %sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
ret void
}
@@ -59,7 +59,7 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) {
%y = fmul reassoc float 3.0, %x
%sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -73,7 +73,7 @@ define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) {
%y = fmul float 2.0, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -88,9 +88,9 @@ define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 {
- %y = fmul float 2.0, %x
- %sin = call float @llvm.sin.f32(float %y)
+define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) {
+ %y = fmul reassoc float 2.0, %x
+ %sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
ret void
}
@@ -103,7 +103,7 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) {
%y = fmul reassoc float 2.0, %x
%sin = call reassoc float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -117,7 +117,7 @@ define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) {
%y = fmul float 0x401921FB60000000, %x
%sin = call float @llvm.sin.f32(float %y)
store float %sin, ptr addrspace(1) %out
@@ -131,9 +131,9 @@ define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x)
; GFX9-NOT: v_fract_f32
; GCN: v_sin_f32
; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #2 {
- %y = fmul float 0x401921FB60000000, %x
- %sin = call float ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/146295
More information about the llvm-commits
mailing list