[llvm] dd12c34 - [AMDGPU] Shrink F16 MAD/FMA to MADAK/MADMK/FMAAK/FMAMK on GFX10
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed May 18 02:02:41 PDT 2022
Author: Jay Foad
Date: 2022-05-18T10:00:06+01:00
New Revision: dd12c3433ee9b4ef15c633bd325ab5a0c9c5e03b
URL: https://github.com/llvm/llvm-project/commit/dd12c3433ee9b4ef15c633bd325ab5a0c9c5e03b
DIFF: https://github.com/llvm/llvm-project/commit/dd12c3433ee9b4ef15c633bd325ab5a0c9c5e03b.diff
LOG: [AMDGPU] Shrink F16 MAD/FMA to MADAK/MADMK/FMAAK/FMAMK on GFX10
Differential Revision: https://reviews.llvm.org/D125803
Added:
Modified:
llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 9f00c78b256ef..d2a8cf7945a0c 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -359,6 +359,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F32_e64:
NewOpcode = AMDGPU::V_FMAAK_F32;
break;
+ case AMDGPU::V_MAD_F16_e64:
+ NewOpcode = AMDGPU::V_MADAK_F16;
+ break;
+ case AMDGPU::V_FMA_F16_e64:
+ NewOpcode = AMDGPU::V_FMAAK_F16;
+ break;
}
}
@@ -380,6 +386,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F32_e64:
NewOpcode = AMDGPU::V_FMAMK_F32;
break;
+ case AMDGPU::V_MAD_F16_e64:
+ NewOpcode = AMDGPU::V_MADMK_F16;
+ break;
+ case AMDGPU::V_FMA_F16_e64:
+ NewOpcode = AMDGPU::V_FMAMK_F16;
+ break;
}
}
@@ -806,9 +818,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (!TII->isVOP3(MI))
continue;
- // TODO: Also shrink F16 forms.
if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
- MI.getOpcode() == AMDGPU::V_FMA_F32_e64) {
+ MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
+ MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
+ MI.getOpcode() == AMDGPU::V_FMA_F16_e64) {
shrinkMadFma(MI);
continue;
}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
index 8150769ef40bb..198c5cb82a619 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
@@ -128,8 +128,8 @@ body: |
; GFX10-LABEL: name: mad_cvv_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
+ ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
@@ -143,8 +143,8 @@ body: |
; GFX10-LABEL: name: mad_vcv_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
+ ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
@@ -158,8 +158,8 @@ body: |
; GFX10-LABEL: name: mad_vvc_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
+ ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
@@ -173,8 +173,8 @@ body: |
; GFX10-LABEL: name: mad_vsc_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
+ ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
%2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
@@ -188,8 +188,8 @@ body: |
; GFX10-LABEL: name: fma_cvv_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
+ ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
@@ -203,8 +203,8 @@ body: |
; GFX10-LABEL: name: fma_vcv_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
+ ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
@@ -218,8 +218,8 @@ body: |
; GFX10-LABEL: name: fma_vvc_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
+ ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
@@ -233,8 +233,8 @@ body: |
; GFX10-LABEL: name: fma_vsc_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
+ ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
More information about the llvm-commits
mailing list