[llvm] c24d68f - [AMDGPU] Take advantage of VOP3 literals in convertToThreeAddress
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 2 09:29:42 PDT 2022
Author: Jay Foad
Date: 2022-08-02T17:27:11+01:00
New Revision: c24d68fff1fe8d3115c411d6e81092eb1f855b52
URL: https://github.com/llvm/llvm-project/commit/c24d68fff1fe8d3115c411d6e81092eb1f855b52
DIFF: https://github.com/llvm/llvm-project/commit/c24d68fff1fe8d3115c411d6e81092eb1f855b52.diff
LOG: [AMDGPU] Take advantage of VOP3 literals in convertToThreeAddress
This improves a corner case where v_fmac can be converted to v_fma on
GFX10+ even if it has a literal operand.
Differential Revision: https://reviews.llvm.org/D130992
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3eaac39cd7165..8eed1b01ff8e8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3416,9 +3416,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
}
// VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma
- // because VOP3 does not allow a literal operand.
- // TODO: Remove this restriction for GFX10.
- if (Src0Literal)
+ // if VOP3 does not allow a literal operand.
+ if (Src0Literal && !ST.hasVOP3Literal())
return nullptr;
unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
index 3dca12d771f1f..be3d6e70f81c7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
@@ -34,17 +34,14 @@ define float @v_fma_imm(float %a, float %c) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_fmac_legacy_f32_e32 v1, 0x41200000, v0
-; GFX10-NEXT: v_mov_b32_e32 v0, v1
+; GFX10-NEXT: v_fma_legacy_f32 v0, 0x41200000, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fma_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_fmac_dx9_zero_f32_e32 v1, 0x41200000, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b32_e32 v0, v1
+; GFX11-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float 10.0, float %c)
ret float %fma
More information about the llvm-commits
mailing list