[llvm] dd12c34 - [AMDGPU] Shrink F16 MAD/FMA to MADAK/MADMK/FMAAK/FMAMK on GFX10

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Wed May 18 02:02:41 PDT 2022


Author: Jay Foad
Date: 2022-05-18T10:00:06+01:00
New Revision: dd12c3433ee9b4ef15c633bd325ab5a0c9c5e03b

URL: https://github.com/llvm/llvm-project/commit/dd12c3433ee9b4ef15c633bd325ab5a0c9c5e03b
DIFF: https://github.com/llvm/llvm-project/commit/dd12c3433ee9b4ef15c633bd325ab5a0c9c5e03b.diff

LOG: [AMDGPU] Shrink F16 MAD/FMA to MADAK/MADMK/FMAAK/FMAMK on GFX10

Differential Revision: https://reviews.llvm.org/D125803

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
    llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 9f00c78b256ef..d2a8cf7945a0c 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -359,6 +359,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
     case AMDGPU::V_FMA_F32_e64:
       NewOpcode = AMDGPU::V_FMAAK_F32;
       break;
+    case AMDGPU::V_MAD_F16_e64:
+      NewOpcode = AMDGPU::V_MADAK_F16;
+      break;
+    case AMDGPU::V_FMA_F16_e64:
+      NewOpcode = AMDGPU::V_FMAAK_F16;
+      break;
     }
   }
 
@@ -380,6 +386,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
     case AMDGPU::V_FMA_F32_e64:
       NewOpcode = AMDGPU::V_FMAMK_F32;
       break;
+    case AMDGPU::V_MAD_F16_e64:
+      NewOpcode = AMDGPU::V_MADMK_F16;
+      break;
+    case AMDGPU::V_FMA_F16_e64:
+      NewOpcode = AMDGPU::V_FMAMK_F16;
+      break;
     }
   }
 
@@ -806,9 +818,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
       if (!TII->isVOP3(MI))
         continue;
 
-      // TODO: Also shrink F16 forms.
       if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
-          MI.getOpcode() == AMDGPU::V_FMA_F32_e64) {
+          MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
+          MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
+          MI.getOpcode() == AMDGPU::V_FMA_F16_e64) {
         shrinkMadFma(MI);
         continue;
       }

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
index 8150769ef40bb..198c5cb82a619 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir
@@ -128,8 +128,8 @@ body: |
     ; GFX10-LABEL: name: mad_cvv_f16
     ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
-    ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
+    ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
+    ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
@@ -143,8 +143,8 @@ body: |
     ; GFX10-LABEL: name: mad_vcv_f16
     ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
-    ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
+    ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
+    ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
@@ -158,8 +158,8 @@ body: |
     ; GFX10-LABEL: name: mad_vvc_f16
     ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
-    ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
+    ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec
+    ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
@@ -173,8 +173,8 @@ body: |
     ; GFX10-LABEL: name: mad_vsc_f16
     ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-    ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
-    ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
+    ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec
+    ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32 = IMPLICIT_DEF
     %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
@@ -188,8 +188,8 @@ body: |
     ; GFX10-LABEL: name: fma_cvv_f16
     ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
-    ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
+    ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
+    ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
@@ -203,8 +203,8 @@ body: |
     ; GFX10-LABEL: name: fma_vcv_f16
     ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
-    ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
+    ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
+    ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
@@ -218,8 +218,8 @@ body: |
     ; GFX10-LABEL: name: fma_vvc_f16
     ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
-    ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
+    ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec
+    ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
@@ -233,8 +233,8 @@ body: |
     ; GFX10-LABEL: name: fma_vsc_f16
     ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-    ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
-    ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
+    ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec
+    ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32 = IMPLICIT_DEF
     %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec


        


More information about the llvm-commits mailing list