[llvm] WIP: [AMDGPU] Remove `UnsafeFPMath` (PR #151079)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 29 01:15:31 PDT 2025
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/151079
>From 29001f0f4dd552ef154d66e29c04a425da5abdf5 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Tue, 29 Jul 2025 12:44:26 +0800
Subject: [PATCH 1/3] Remove `UnsafeFPMath` in `AMDGPUCodeGenPrepare`
---
.../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 15 +---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll | 16 ++--
llvm/test/CodeGen/AMDGPU/rsq.f32.ll | 90 +++++++++----------
3 files changed, 57 insertions(+), 64 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 5f1983791cfae..7e2c67d22cf6e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -89,10 +89,6 @@ static cl::opt<bool> DisableFDivExpand(
cl::ReallyHidden,
cl::init(false));
-static bool hasUnsafeFPMath(const Function &F) {
- return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
-}
-
class AMDGPUCodeGenPrepareImpl
: public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
public:
@@ -104,7 +100,6 @@ class AMDGPUCodeGenPrepareImpl
const DominatorTree *DT;
const UniformityInfo &UA;
const DataLayout &DL;
- const bool HasUnsafeFPMath;
const bool HasFP32DenormalFlush;
bool FlowChanged = false;
mutable Function *SqrtF32 = nullptr;
@@ -117,7 +112,6 @@ class AMDGPUCodeGenPrepareImpl
const DominatorTree *DT, const UniformityInfo &UA)
: F(F), ST(TM.getSubtarget<GCNSubtarget>(F)), TM(TM), TLI(TLI), AC(AC),
DT(DT), UA(UA), DL(F.getDataLayout()),
- HasUnsafeFPMath(hasUnsafeFPMath(F)),
HasFP32DenormalFlush(SIModeRegisterDefaults(F, ST).FP32Denormals ==
DenormalMode::getPreserveSign()) {}
@@ -637,8 +631,7 @@ bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
return false;
// v_rsq_f32 gives 1ulp
- return SqrtFMF.approxFunc() || HasUnsafeFPMath ||
- SqrtOp->getFPAccuracy() >= 1.0f;
+ return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f;
}
Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
@@ -664,7 +657,7 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
IRBuilder<>::FastMathFlagGuard Guard(Builder);
Builder.setFastMathFlags(DivFMF | SqrtFMF);
- if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) || HasUnsafeFPMath ||
+ if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
canIgnoreDenormalInput(Den, CtxI)) {
Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
// -1.0 / sqrt(x) -> fneg(rsq(x))
@@ -852,7 +845,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
// expansion of afn to codegen. The current interpretation is so aggressive we
// don't need any pre-consideration here when we have better information. A
// more conservative interpretation could use handling here.
- const bool AllowInaccurateRcp = HasUnsafeFPMath || DivFMF.approxFunc();
+ const bool AllowInaccurateRcp = DivFMF.approxFunc();
if (!RsqOp && AllowInaccurateRcp)
return false;
@@ -2026,7 +2019,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
// We're trying to handle the fast-but-not-that-fast case only. The lowering
// of fast llvm.sqrt will give the raw instruction anyway.
- if (SqrtFMF.approxFunc() || HasUnsafeFPMath)
+ if (SqrtFMF.approxFunc())
return false;
const float ReqdAccuracy = FPOp->getFPAccuracy();
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
index 425a8530afa97..477f0a610feec 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -51,7 +51,7 @@ define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(ptr addrspace(1) %out,
; SI-NOT: [[RESULT]]
; SI: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #4 {
- %rcp = fdiv float 1.0, %src, !fpmath !0
+ %rcp = fdiv afn float 1.0, %src, !fpmath !0
store float %rcp, ptr addrspace(1) %out, align 4
ret void
}
@@ -105,8 +105,8 @@ define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract(ptr addrs
; SI: v_sqrt_f32_e32
; SI: v_rcp_f32_e32
define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #2 {
- %sqrt = call float @llvm.sqrt.f32(float %src)
- %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
+ %sqrt = call afn float @llvm.sqrt.f32(float %src)
+ %rcp = call afn float @llvm.amdgcn.rcp.f32(float %sqrt)
store float %rcp, ptr addrspace(1) %out, align 4
ret void
}
@@ -148,7 +148,7 @@ define amdgpu_kernel void @rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
; SI: v_fma_f64
; SI: v_fma_f64
define amdgpu_kernel void @unsafe_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
- %rcp = fdiv double 1.0, %src
+ %rcp = fdiv afn double 1.0, %src
store double %rcp, ptr addrspace(1) %out, align 8
ret void
}
@@ -214,9 +214,9 @@ define amdgpu_kernel void @unsafe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %
}
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #3 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" }
-attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" }
+attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #2 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
+attributes #4 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
!0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
index f7e0388561104..f967e951b27a4 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-UNSAFE,SI-DAZ-UNSAFE %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-UNSAFE,SI-IEEE-UNSAFE %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-UNSAFE,SI-DAZ-UNSAFE %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-UNSAFE,SI-IEEE-UNSAFE %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-UNSAFE,CI-DAZ-UNSAFE %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=ieee -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-UNSAFE,CI-IEEE-UNSAFE %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-UNSAFE,CI-DAZ-UNSAFE %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-UNSAFE,CI-IEEE-UNSAFE %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -65,8 +65,8 @@ define amdgpu_kernel void @rsq_f32(ptr addrspace(1) noalias %out, ptr addrspace(
; GCN-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GCN-UNSAFE-NEXT: s_endpgm
%val = load float, ptr addrspace(1) %in, align 4
- %sqrt = call contract float @llvm.sqrt.f32(float %val) nounwind readnone
- %div = fdiv contract float 1.0, %sqrt, !fpmath !0
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val) nounwind readnone
+ %div = fdiv afn contract float 1.0, %sqrt, !fpmath !0
store float %div, ptr addrspace(1) %out, align 4
ret void
}
@@ -103,8 +103,8 @@ define amdgpu_kernel void @rsq_f32_sgpr(ptr addrspace(1) noalias %out, float %va
; GCN-UNSAFE-NEXT: s_mov_b32 s2, -1
; GCN-UNSAFE-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-UNSAFE-NEXT: s_endpgm
- %sqrt = call contract float @llvm.sqrt.f32(float %val) nounwind readnone
- %div = fdiv contract float 1.0, %sqrt, !fpmath !0
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val) nounwind readnone
+ %div = fdiv afn contract float 1.0, %sqrt, !fpmath !0
store float %div, ptr addrspace(1) %out, align 4
ret void
}
@@ -196,7 +196,7 @@ define amdgpu_kernel void @rsqrt_fmul(ptr addrspace(1) %out, ptr addrspace(1) %i
%x = call contract float @llvm.sqrt.f32(float %a)
%y = fmul contract float %x, %b
- %z = fdiv arcp contract float %c, %y
+ %z = fdiv arcp afn contract float %c, %y
store float %z, ptr addrspace(1) %out.gep
ret void
}
@@ -258,8 +258,8 @@ define amdgpu_kernel void @neg_rsq_f32(ptr addrspace(1) noalias %out, ptr addrsp
; GCN-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GCN-UNSAFE-NEXT: s_endpgm
%val = load float, ptr addrspace(1) %in, align 4
- %sqrt = call contract float @llvm.sqrt.f32(float %val)
- %div = fdiv contract float -1.0, %sqrt, !fpmath !0
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val)
+ %div = fdiv afn contract float -1.0, %sqrt, !fpmath !0
store float %div, ptr addrspace(1) %out, align 4
ret void
}
@@ -322,8 +322,8 @@ define amdgpu_kernel void @neg_rsq_neg_f32(ptr addrspace(1) noalias %out, ptr ad
; GCN-UNSAFE-NEXT: s_endpgm
%val = load float, ptr addrspace(1) %in, align 4
%val.fneg = fneg float %val
- %sqrt = call contract float @llvm.sqrt.f32(float %val.fneg)
- %div = fdiv contract float -1.0, %sqrt, !fpmath !0
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val.fneg)
+ %div = fdiv afn contract float -1.0, %sqrt, !fpmath !0
store float %div, ptr addrspace(1) %out, align 4
ret void
}
@@ -343,8 +343,8 @@ define float @v_neg_rsq_neg_f32(float %val) {
; GCN-IEEE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
%val.fneg = fneg float %val
- %sqrt = call contract float @llvm.sqrt.f32(float %val.fneg)
- %div = fdiv contract float -1.0, %sqrt, !fpmath !0
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val.fneg)
+ %div = fdiv afn contract float -1.0, %sqrt, !fpmath !0
ret float %div
}
@@ -367,8 +367,8 @@ define <2 x float> @v_neg_rsq_neg_v2f32(<2 x float> %val) {
; GCN-IEEE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
%val.fneg = fneg <2 x float> %val
- %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val.fneg)
- %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
+ %sqrt = call afn contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val.fneg)
+ %div = fdiv afn contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
ret <2 x float> %div
}
@@ -387,8 +387,8 @@ define float @v_neg_rsq_neg_f32_foldable_user(float %val0, float %val1) {
; GCN-IEEE-NEXT: v_mul_f32_e64 v0, -v0, v1
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
%val0.neg = fneg float %val0
- %sqrt = call contract float @llvm.sqrt.f32(float %val0.neg)
- %div = fdiv contract float -1.0, %sqrt, !fpmath !0
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val0.neg)
+ %div = fdiv afn contract float -1.0, %sqrt, !fpmath !0
%user = fmul contract float %div, %val1
ret float %user
}
@@ -412,8 +412,8 @@ define <2 x float> @v_neg_rsq_neg_v2f32_foldable_user(<2 x float> %val0, <2 x fl
; GCN-IEEE-NEXT: v_mul_f32_e64 v1, -v1, v3
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
%val0.fneg = fneg <2 x float> %val0
- %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0.fneg)
- %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
+ %sqrt = call afn contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0.fneg)
+ %div = fdiv afn contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
%user = fmul contract <2 x float> %div, %val1
ret <2 x float> %user
}
@@ -432,8 +432,8 @@ define float @v_neg_rsq_f32(float %val) {
; GCN-IEEE-NEXT: v_rsq_f32_e32 v0, v0
; GCN-IEEE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract float @llvm.sqrt.f32(float %val)
- %div = fdiv contract float -1.0, %sqrt, !fpmath !0
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val)
+ %div = fdiv afn contract float -1.0, %sqrt, !fpmath !0
ret float %div
}
@@ -455,8 +455,8 @@ define <2 x float> @v_neg_rsq_v2f32(<2 x float> %val) {
; GCN-IEEE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-IEEE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val)
- %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
+ %sqrt = call afn contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val)
+ %div = fdiv afn contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
ret <2 x float> %div
}
@@ -474,8 +474,8 @@ define float @v_neg_rsq_f32_foldable_user(float %val0, float %val1) {
; GCN-IEEE-NEXT: v_rsq_f32_e32 v0, v0
; GCN-IEEE-NEXT: v_mul_f32_e64 v0, -v0, v1
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract float @llvm.sqrt.f32(float %val0)
- %div = fdiv contract float -1.0, %sqrt, !fpmath !0
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val0)
+ %div = fdiv afn contract float -1.0, %sqrt, !fpmath !0
%user = fmul contract float %div, %val1
ret float %user
}
@@ -643,8 +643,8 @@ define <2 x float> @v_neg_rsq_v2f32_foldable_user(<2 x float> %val0, <2 x float>
; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2
; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, v1, v3
; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0)
- %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
+ %sqrt = call afn contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0)
+ %div = fdiv afn contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
%user = fmul contract <2 x float> %div, %val1
ret <2 x float> %user
}
@@ -672,8 +672,8 @@ define float @v_rsq_f32(float %val) {
; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc
; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GCN-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
- %div = fdiv contract float 1.0, %sqrt, !fpmath !1
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val), !fpmath !1
+ %div = fdiv afn contract float 1.0, %sqrt, !fpmath !1
ret float %div
}
@@ -756,9 +756,9 @@ define { float, float } @v_rsq_f32_multi_use(float %val) {
; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v2, vcc, 0, v2
; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v1, v2
; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val), !fpmath !1
%insert.0 = insertvalue { float, float } poison, float %sqrt, 0
- %div = fdiv arcp contract float 1.0, %sqrt, !fpmath !1
+ %div = fdiv arcp afn contract float 1.0, %sqrt, !fpmath !1
%insert.1 = insertvalue { float, float } %insert.0, float %div, 1
ret { float, float } %insert.1
}
@@ -838,8 +838,8 @@ define float @v_rsq_f32_missing_contract0(float %val) {
; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0
; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call float @llvm.sqrt.f32(float %val), !fpmath !1
- %div = fdiv arcp contract float 1.0, %sqrt, !fpmath !1
+ %sqrt = call afn float @llvm.sqrt.f32(float %val), !fpmath !1
+ %div = fdiv arcp afn contract float 1.0, %sqrt, !fpmath !1
ret float %div
}
@@ -855,8 +855,8 @@ define float @v_rsq_f32_missing_contract1(float %val) {
; GCN-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-IEEE-NEXT: v_rsq_f32_e32 v0, v0
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
- %div = fdiv arcp float 1.0, %sqrt, !fpmath !1
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val), !fpmath !1
+ %div = fdiv arcp afn float 1.0, %sqrt, !fpmath !1
ret float %div
}
@@ -876,8 +876,8 @@ define float @v_rsq_f32_contractable_user(float %val0, float %val1) {
; GCN-IEEE-NEXT: v_rsq_f32_e32 v0, v0
; GCN-IEEE-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1
- %div = fdiv contract float 1.0, %sqrt, !fpmath !1
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val0), !fpmath !1
+ %div = fdiv afn contract float 1.0, %sqrt, !fpmath !1
%add = fadd contract float %div, %val1
ret float %add
}
@@ -897,8 +897,8 @@ define float @v_rsq_f32_contractable_user_missing_contract0(float %val0, float %
; GCN-IEEE-NEXT: v_rsq_f32_e32 v0, v0
; GCN-IEEE-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1
- %div = fdiv contract float 1.0, %sqrt, !fpmath !1
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val0), !fpmath !1
+ %div = fdiv afn contract float 1.0, %sqrt, !fpmath !1
%add = fadd contract float %div, %val1
ret float %add
}
@@ -918,8 +918,8 @@ define float @v_rsq_f32_contractable_user_missing_contract1(float %val0, float %
; GCN-IEEE-NEXT: v_rsq_f32_e32 v0, v0
; GCN-IEEE-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1
- %div = fdiv contract float 1.0, %sqrt, !fpmath !1
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val0), !fpmath !1
+ %div = fdiv afn contract float 1.0, %sqrt, !fpmath !1
%add = fadd float %div, %val1
ret float %add
}
@@ -953,8 +953,8 @@ define float @v_rsq_f32_known_never_posdenormal(float nofpclass(psub) %val) {
; GCN-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-IEEE-NEXT: v_rsq_f32_e32 v0, v0
; GCN-IEEE-NEXT: s_setpc_b64 s[30:31]
- %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
- %div = fdiv contract float 1.0, %sqrt, !fpmath !1
+ %sqrt = call afn contract float @llvm.sqrt.f32(float %val), !fpmath !1
+ %div = fdiv afn contract float 1.0, %sqrt, !fpmath !1
ret float %div
}
>From f8bb469a294c7de4e989b0039c7f0f6f19851a98 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Tue, 29 Jul 2025 14:44:15 +0800
Subject: [PATCH 2/3] Remove `UnsafeFPMath` in `AMDGPUInstructions.td`
---
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 7a50923ffedc6..511fc6967da31 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -94,7 +94,6 @@ def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode()
def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
def IEEEModeEnabled : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
def IEEEModeDisabled : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
-def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
}
def FMA : Predicate<"Subtarget->hasFMA()">;
>From e91165ec6833e4f64fd53217a47beccb12c1bfc5 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Tue, 29 Jul 2025 16:15:16 +0800
Subject: [PATCH 3/3] Remove `UnsafeFPMath` in `*ISelLowering.cpp`
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 6 +++---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 14 ++++++--------
2 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 61189337e5233..31c4f62d24dfe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2634,7 +2634,7 @@ bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG,
if (Flags.hasApproximateFuncs())
return true;
auto &Options = DAG.getTarget().Options;
- return Options.UnsafeFPMath || Options.ApproxFuncFPMath;
+ return Options.ApproxFuncFPMath;
}
bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG,
@@ -2757,7 +2757,7 @@ SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
const auto &Options = getTargetMachine().Options;
if (VT == MVT::f16 || Flags.hasApproximateFuncs() ||
- Options.ApproxFuncFPMath || Options.UnsafeFPMath) {
+ Options.ApproxFuncFPMath) {
if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
// Log and multiply in f32 is good enough for f16.
@@ -3585,7 +3585,7 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) con
if (N0.getValueType() == MVT::f32)
return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);
- if (getTargetMachine().Options.UnsafeFPMath) {
+ if (Op->getFlags().hasApproximateFuncs()) {
// There is a generic expand for FP_TO_FP16 with unsafe fast math.
return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9017f4f26f835..1ae4a927ad9fe 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7148,7 +7148,7 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
}
- if (getTargetMachine().Options.UnsafeFPMath) {
+ if (Op->getFlags().hasApproximateFuncs()) {
SDValue Flags = Op.getOperand(1);
SDValue Src32 = DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Src, Flags);
return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Src32, Flags);
@@ -11243,8 +11243,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
- bool AllowInaccurateRcp =
- Flags.hasApproximateFuncs() || DAG.getTarget().Options.UnsafeFPMath;
+ bool AllowInaccurateRcp = Flags.hasApproximateFuncs();
if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
// Without !fpmath accuracy information, we can't do more because we don't
@@ -11263,7 +11262,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
// 1.0 / sqrt(x) -> rsq(x)
- // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
+ // XXX - Is afn sufficient to do this for f64? The maximum ULP
// error seems really high at 2^29 ULP.
// 1.0 / x -> rcp(x)
return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
@@ -11297,8 +11296,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV64(SDValue Op,
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
- bool AllowInaccurateDiv =
- Flags.hasApproximateFuncs() || DAG.getTarget().Options.UnsafeFPMath;
+ bool AllowInaccurateDiv = Flags.hasApproximateFuncs();
if (!AllowInaccurateDiv)
return SDValue();
@@ -14550,7 +14548,7 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
return ISD::FMAD;
const TargetOptions &Options = DAG.getTarget().Options;
- if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+ if ((Options.AllowFPOpFusion == FPOpFusion::Fast ||
(N0->getFlags().hasAllowContract() &&
N1->getFlags().hasAllowContract())) &&
isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
@@ -15675,7 +15673,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
// regardless of the denorm mode setting. Therefore,
// unsafe-fp-math/fp-contract is sufficient to allow generating fdot2.
const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+ if (Options.AllowFPOpFusion == FPOpFusion::Fast ||
(N->getFlags().hasAllowContract() &&
FMA->getFlags().hasAllowContract())) {
Op1 = Op1.getOperand(0);
More information about the llvm-commits
mailing list