[llvm] [AMDGPU] Make fast-fmaf an optional flag, defaulting to True for GFX9 (PR #161450)
Yi Qian via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 19:01:42 PDT 2025
https://github.com/yiqian1 updated https://github.com/llvm/llvm-project/pull/161450
>From aab5e41bd8374daed3563503aed35acfb95fb5af Mon Sep 17 00:00:00 2001
From: Yi Qian <yi.qian at amd.com>
Date: Tue, 30 Sep 2025 20:34:15 +0000
Subject: [PATCH] [AMDGPU] Add a target option to disable aggressive FMA fusion
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 7 +++++++
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 +++++
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 1 +
llvm/lib/Target/AMDGPU/GCNSubtarget.cpp | 4 ++++
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 ++++++--
5 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index eaa1870f4be28..5a08e7d6db347 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1238,6 +1238,13 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
+def FeatureDisableAggressiveFMAFusion : SubtargetFeature<
+ "disable-aggressive-fma-fusion",
+ "DisableAggressiveFMAFusion",
+ "true",
+ "Do not fold fmul and fadd/fsub into fma."
+>;
+
// Ugly hack to accomodate assembling modules with mixed
// wavesizes. Ideally we would have a mapping symbol in assembly which
// would keep track of which sections of code should be treated as
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index ed03ef21b6dda..0c380a7e4dc84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -76,6 +76,7 @@ class AMDGPUSubtarget {
bool EnablePromoteAlloca = false;
bool HasTrigReducedRange = false;
bool FastFMAF32 = false;
+ bool DisableAggressiveFMAFusion = false;
unsigned EUsPerCU = 4;
unsigned MaxWavesPerEU = 10;
unsigned LocalMemorySize = 0;
@@ -303,6 +304,10 @@ class AMDGPUSubtarget {
return FastFMAF32;
}
+ bool hasDisableAggressiveFMAFusion() const {
+ return DisableAggressiveFMAFusion;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 03d16fdd54c42..554549063dbcc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -288,6 +288,7 @@ const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = {
AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal,
AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess,
AMDGPU::FeatureUnalignedAccessMode,
+ AMDGPU::FeatureDisableAggressiveFMAFusion,
AMDGPU::FeatureAutoWaitcntBeforeBarrier,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 7b94ea3ffbf1f..b7473e5ea4759 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -85,6 +85,10 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += "-wavefrontsize64,";
}
+ // GFX9 enables fast-fmaf by default
+ if (GPU.contains_insensitive("gfx9") && !FS.contains_insensitive("fast-fmaf"))
+ FullFS += "+fast-fmaf";
+
FullFS += FS;
ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 16530087444d2..59fcf9fb6da39 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6502,10 +6502,14 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
// When fma is quarter rate, for f64 where add / sub are at best half rate,
// most of these combines appear to be cycle neutral but save on instruction
// count / code size.
- return true;
+ return Subtarget->hasFastFMAF32() &&
+ !Subtarget->hasDisableAggressiveFMAFusion();
}
-bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }
+bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const {
+ return Subtarget->hasFastFMAF32() &&
+ !Subtarget->hasDisableAggressiveFMAFusion();
+}
EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
EVT VT) const {
More information about the llvm-commits
mailing list