[llvm] afa0ed3 - [AMDGPU] Fix shrinking of F16 FMA on newer subtargets
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 8 08:41:12 PDT 2022
Author: Jay Foad
Date: 2022-09-08T16:41:04+01:00
New Revision: afa0ed33df0717e73232f72ff8acee279f285140
URL: https://github.com/llvm/llvm-project/commit/afa0ed33df0717e73232f72ff8acee279f285140
DIFF: https://github.com/llvm/llvm-project/commit/afa0ed33df0717e73232f72ff8acee279f285140.diff
LOG: [AMDGPU] Fix shrinking of F16 FMA on newer subtargets
D125803 introduced shrinking of F16 FMA to FMAAK/FMAMK in
SIShrinkInstructions (useful on GFX10+ where VOP3 instructions may have
a literal operand) but failed to handle the V_FMA_F16_gfx9_e64 form of
the opcode which is used on GFX9+.
Differential Revision: https://reviews.llvm.org/D133489
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/test/CodeGen/AMDGPU/fma.f16.ll
llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 73844ca8c088..d699166287f5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2887,12 +2887,15 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
static constexpr unsigned ModifierOpNames[] = {
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
- AMDGPU::OpName::omod};
+ AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
- for (unsigned Name : reverse(ModifierOpNames))
- MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, Name));
+ for (unsigned Name : reverse(ModifierOpNames)) {
+ int Idx = AMDGPU::getNamedOperandIdx(Opc, Name);
+ if (Idx >= 0)
+ MI.removeOperand(Idx);
+ }
}
bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 05d2dd000162..421799e4c98d 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -382,6 +382,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
NewOpcode = AMDGPU::V_MADAK_F16;
break;
case AMDGPU::V_FMA_F16_e64:
+ case AMDGPU::V_FMA_F16_gfx9_e64:
NewOpcode = AMDGPU::V_FMAAK_F16;
break;
}
@@ -409,6 +410,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
NewOpcode = AMDGPU::V_MADMK_F16;
break;
case AMDGPU::V_FMA_F16_e64:
+ case AMDGPU::V_FMA_F16_gfx9_e64:
NewOpcode = AMDGPU::V_FMAMK_F16;
break;
}
@@ -852,7 +854,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
- MI.getOpcode() == AMDGPU::V_FMA_F16_e64) {
+ MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
+ MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
shrinkMadFma(MI);
continue;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
index 20d39efbae1b..d4eaf6605a68 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-SDAG
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL
-; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG
-; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10
declare half @llvm.fma.f16(half, half, half)
declare half @llvm.maxnum.f16(half, half)
@@ -58,19 +58,12 @@ define half @test_fmaak(half %x, half %y, half %z) {
; GFX9-GISEL-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: test_fmaak:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-SDAG-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: test_fmaak:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v0, v1, 0x4200
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: test_fmaak:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
+; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call half @llvm.fma.f16(half %x, half %y, half 0xH4200)
ret half %r
}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
index 198c5cb82a61..9ace3f1ecaa0 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
@@ -192,7 +192,7 @@ body: |
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+ %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...
@@ -207,7 +207,7 @@ body: |
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
+ %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...
@@ -222,7 +222,7 @@ body: |
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
+ %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...
@@ -237,6 +237,6 @@ body: |
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
+ %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...
More information about the llvm-commits
mailing list