[llvm] [AMDGPU] Make fast-fmaf an optional flag, defaulting to True for GFX9 (PR #161450)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 30 14:37:00 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Yi Qian (yiqian1)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/161450.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.cpp (+4) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+4-2) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index eaa1870f4be28..62b4e32dbcb59 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1524,7 +1524,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
    FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
    FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
-   FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
+   FeatureDPP, FeatureIntClamp,
    FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
    FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
    FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 7b94ea3ffbf1f..b7473e5ea4759 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -85,6 +85,10 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
       FullFS += "-wavefrontsize64,";
   }
 
+  // GFX9 enables fast-fmaf by default
+  if (GPU.contains_insensitive("gfx9") && !FS.contains_insensitive("fast-fmaf"))
+    FullFS += "+fast-fmaf";
+
   FullFS += FS;
 
   ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 16530087444d2..910b693c8c6a6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6502,10 +6502,12 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
   // When fma is quarter rate, for f64 where add / sub are at best half rate,
   // most of these combines appear to be cycle neutral but save on instruction
   // count / code size.
-  return true;
+  return Subtarget->hasFastFMAF32();
 }
 
-bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }
+bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const {
+  return Subtarget->hasFastFMAF32();
+}
 
 EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
                                          EVT VT) const {

``````````

</details>


https://github.com/llvm/llvm-project/pull/161450


More information about the llvm-commits mailing list