[llvm] 07bafab - [AMDGPU] Do not generate V_FMAC_DX9_ZERO_F32 on GFX12 (#171116)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 05:20:06 PST 2025
Author: Jay Foad
Date: 2025-12-08T13:20:02Z
New Revision: 07bafab83de2ba9f7d15fb194e0d34cd71f17af2
URL: https://github.com/llvm/llvm-project/commit/07bafab83de2ba9f7d15fb194e0d34cd71f17af2
DIFF: https://github.com/llvm/llvm-project/commit/07bafab83de2ba9f7d15fb194e0d34cd71f17af2.diff
LOG: [AMDGPU] Do not generate V_FMAC_DX9_ZERO_F32 on GFX12 (#171116)
GFX12 does not have the FMAC form of this instruction, only the FMA
form.
Fixes: #170437
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b201c85cffc9f..3b14a82bbbb04 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2927,6 +2927,9 @@ def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">,
def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">,
AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
+def HasFmacLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts() && Subtarget->getGeneration() < AMDGPUSubtarget::GFX12">,
+ AssemblerPredicate<(all_of FeatureGFX10_3Insts, (not FeatureGFX12Insts))>;
+
def HasAtomicDsPkAdd16Insts : Predicate<"Subtarget->hasAtomicDsPkAdd16Insts()">,
AssemblerPredicate<(any_of FeatureAtomicDsPkAdd16Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 1698fe209df04..984d1a4db4cd6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1451,7 +1451,7 @@ def : GCNPat <
// Don't allow source modifiers. If there are any source modifiers then it's
// better to select fma instead of fmac.
-let SubtargetPredicate = HasFmaLegacy32 in
+let SubtargetPredicate = HasFmacLegacy32 in
def : GCNPat <
(f32 (int_amdgcn_fma_legacy (VOP3NoMods f32:$src0),
(VOP3NoMods f32:$src1),
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index dbb7862ab4ab5..11ce1025a381e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1266,14 +1266,14 @@ let Constraints = "$vdst = $src2",
defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">;
} // End SubtargetPredicate = HasDLInsts
-let SubtargetPredicate = HasFmaLegacy32 in {
+let SubtargetPredicate = HasFmacLegacy32 in {
let Constraints = "$vdst = $src2",
isConvertibleToThreeAddress = 1,
isCommutable = 1 in
defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>;
-} // End SubtargetPredicate = HasFmaLegacy32
+} // End SubtargetPredicate = HasFmacLegacy32
let SubtargetPredicate = HasFmacF64Inst,
Constraints = "$vdst = $src2",
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
index dab67cde215f4..0559159aa7451 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
@@ -3,6 +3,8 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
define float @v_fma(float %a, float %b, float %c) {
; GFX10-LABEL: v_fma:
@@ -16,6 +18,16 @@ define float @v_fma(float %a, float %b, float %c) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_fma:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, v2
+; GFX12-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c)
ret float %fma
}
@@ -32,6 +44,16 @@ define float @v_fmac(float %a, float %b, float %c) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fmac_dx9_zero_f32_e32 v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_fmac:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v1, v2, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.amdgcn.fma.legacy(float %b, float %c, float %a)
ret float %fma
}
@@ -48,6 +70,16 @@ define float @v_fma_imm(float %a, float %c) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_fma_imm:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float 10.0, float %c)
ret float %fma
}
@@ -64,6 +96,16 @@ define float @v_fmac_imm(float %a, float %c) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fmac_dx9_zero_f32_e32 v0, 0x41200000, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_fmac_imm:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v1, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.amdgcn.fma.legacy(float 10.0, float %c, float %a)
ret float %fma
}
@@ -80,6 +122,16 @@ define float @v_fabs_fma(float %a, float %b, float %c) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, |v0|, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_fabs_fma:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fma_dx9_zero_f32 v0, |v0|, v1, v2
+; GFX12-NEXT: s_setpc_b64 s[30:31]
%fabs.a = call float @llvm.fabs.f32(float %a)
%fma = call float @llvm.amdgcn.fma.legacy(float %fabs.a, float %b, float %c)
ret float %fma
@@ -97,6 +149,16 @@ define float @v_fneg_fabs_fma(float %a, float %b, float %c) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, -|v1|, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_fneg_fabs_fma:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v0, -|v1|, v2
+; GFX12-NEXT: s_setpc_b64 s[30:31]
%fabs.b = call float @llvm.fabs.f32(float %b)
%neg.fabs.b = fneg float %fabs.b
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float %neg.fabs.b, float %c)
@@ -115,6 +177,16 @@ define float @v_fneg_fma(float %a, float %b, float %c) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, -v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_fneg_fma:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, -v2
+; GFX12-NEXT: s_setpc_b64 s[30:31]
%neg.c = fneg float %c
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %neg.c)
ret float %fma
@@ -132,6 +204,16 @@ define float @v_fma_const_const(float %a) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, 2.0, -1.0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_fma_const_const:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v0, 2.0, -1.0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float 2.0, float -1.0)
ret float %fma
}
More information about the llvm-commits
mailing list