[llvm] WIP: [AMDGPU] Remove `UnsafeFPMath` (PR #151079)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 28 21:45:59 PDT 2025
https://github.com/paperchalice created https://github.com/llvm/llvm-project/pull/151079
None
>From 6258dcb3ee3645743b5c0bd82165c7db5a23c639 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Tue, 29 Jul 2025 12:44:26 +0800
Subject: [PATCH] Remove `UnsafeFPMath` in `AMDGPUCodeGenPrepare`
---
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 5f1983791cfae..7e2c67d22cf6e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -89,10 +89,6 @@ static cl::opt<bool> DisableFDivExpand(
cl::ReallyHidden,
cl::init(false));
-static bool hasUnsafeFPMath(const Function &F) {
- return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
-}
-
class AMDGPUCodeGenPrepareImpl
: public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
public:
@@ -104,7 +100,6 @@ class AMDGPUCodeGenPrepareImpl
const DominatorTree *DT;
const UniformityInfo &UA;
const DataLayout &DL;
- const bool HasUnsafeFPMath;
const bool HasFP32DenormalFlush;
bool FlowChanged = false;
mutable Function *SqrtF32 = nullptr;
@@ -117,7 +112,6 @@ class AMDGPUCodeGenPrepareImpl
const DominatorTree *DT, const UniformityInfo &UA)
: F(F), ST(TM.getSubtarget<GCNSubtarget>(F)), TM(TM), TLI(TLI), AC(AC),
DT(DT), UA(UA), DL(F.getDataLayout()),
- HasUnsafeFPMath(hasUnsafeFPMath(F)),
HasFP32DenormalFlush(SIModeRegisterDefaults(F, ST).FP32Denormals ==
DenormalMode::getPreserveSign()) {}
@@ -637,8 +631,7 @@ bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
return false;
// v_rsq_f32 gives 1ulp
- return SqrtFMF.approxFunc() || HasUnsafeFPMath ||
- SqrtOp->getFPAccuracy() >= 1.0f;
+ return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f;
}
Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
@@ -664,7 +657,7 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
IRBuilder<>::FastMathFlagGuard Guard(Builder);
Builder.setFastMathFlags(DivFMF | SqrtFMF);
- if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) || HasUnsafeFPMath ||
+ if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
canIgnoreDenormalInput(Den, CtxI)) {
Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
// -1.0 / sqrt(x) -> fneg(rsq(x))
@@ -852,7 +845,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
// expansion of afn to codegen. The current interpretation is so aggressive we
// don't need any pre-consideration here when we have better information. A
// more conservative interpretation could use handling here.
- const bool AllowInaccurateRcp = HasUnsafeFPMath || DivFMF.approxFunc();
+ const bool AllowInaccurateRcp = DivFMF.approxFunc();
if (!RsqOp && AllowInaccurateRcp)
return false;
@@ -2026,7 +2019,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
// We're trying to handle the fast-but-not-that-fast case only. The lowering
// of fast llvm.sqrt will give the raw instruction anyway.
- if (SqrtFMF.approxFunc() || HasUnsafeFPMath)
+ if (SqrtFMF.approxFunc())
return false;
const float ReqdAccuracy = FPOp->getFPAccuracy();
More information about the llvm-commits
mailing list