[llvm] [AMDGPU] Make fast-fmaf an optional flag, defaulting to True for GFX9 (PR #161450)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 30 14:37:00 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Yi Qian (yiqian1)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/161450.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+1-1)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+4-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index eaa1870f4be28..62b4e32dbcb59 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1524,7 +1524,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
- FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
+ FeatureDPP, FeatureIntClamp,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 7b94ea3ffbf1f..b7473e5ea4759 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -85,6 +85,10 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += "-wavefrontsize64,";
}
+ // GFX9 enables fast-fmaf by default
+ if (GPU.contains_insensitive("gfx9") && !FS.contains_insensitive("fast-fmaf"))
+ FullFS += "+fast-fmaf";
+
FullFS += FS;
ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 16530087444d2..910b693c8c6a6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6502,10 +6502,12 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
// When fma is quarter rate, for f64 where add / sub are at best half rate,
// most of these combines appear to be cycle neutral but save on instruction
// count / code size.
- return true;
+ return Subtarget->hasFastFMAF32();
}
-bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }
+bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const {
+ return Subtarget->hasFastFMAF32();
+}
EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
EVT VT) const {
``````````
</details>
https://github.com/llvm/llvm-project/pull/161450
More information about the llvm-commits
mailing list