[llvm] cbba8f0 - [AMDGPU] Codegen support for v_fmaak_f64/f_fmamk_f64 (#148734)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 14 17:57:10 PDT 2025


Author: Stanislav Mekhanoshin
Date: 2025-07-14T17:57:06-07:00
New Revision: cbba8f0acb537be00f649a640e98f8c92692fe9b

URL: https://github.com/llvm/llvm-project/commit/cbba8f0acb537be00f649a640e98f8c92692fe9b
DIFF: https://github.com/llvm/llvm-project/commit/cbba8f0acb537be00f649a640e98f8c92692fe9b.diff

LOG: [AMDGPU] Codegen support for v_fmaak_f64/f_fmamk_f64 (#148734)

Added: 
    llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
    llvm/test/CodeGen/AMDGPU/literal64.ll
    llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
    llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9b9291e8ef199..56fbb79c0d805 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3513,6 +3513,10 @@ static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc) {
                                         ? AMDGPU::V_FMAAK_F16_t16
                                         : AMDGPU::V_FMAAK_F16_fake16
                                   : AMDGPU::V_FMAAK_F16;
+  case AMDGPU::V_FMAC_F64_e32:
+  case AMDGPU::V_FMAC_F64_e64:
+  case AMDGPU::V_FMA_F64_e64:
+    return AMDGPU::V_FMAAK_F64;
   default:
     llvm_unreachable("invalid instruction");
   }
@@ -3541,6 +3545,10 @@ static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc) {
                                         ? AMDGPU::V_FMAMK_F16_t16
                                         : AMDGPU::V_FMAMK_F16_fake16
                                   : AMDGPU::V_FMAMK_F16;
+  case AMDGPU::V_FMAC_F64_e32:
+  case AMDGPU::V_FMAC_F64_e64:
+  case AMDGPU::V_FMA_F64_e64:
+    return AMDGPU::V_FMAMK_F64;
   default:
     llvm_unreachable("invalid instruction");
   }
@@ -3619,7 +3627,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
       Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
       Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
-      Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
+      Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMA_F64_e64 ||
+      Opc == AMDGPU::V_FMAC_F64_e64) {
     // Don't fold if we are using source or output modifiers. The new VOP2
     // instructions don't have them.
     if (hasAnyModifiersSet(UseMI))
@@ -3691,7 +3700,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
 
       if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
           Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
-          Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
+          Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
+          Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
         UseMI.untieRegOperand(
             AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
 
@@ -3759,7 +3769,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
 
       if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
           Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
-          Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
+          Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
+          Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
         UseMI.untieRegOperand(
             AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
 
@@ -4080,8 +4091,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
   const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
   const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel);
 
-  if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
-      !IsLegacy &&
+  if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
+      (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
       // If we have an SGPR input, we will violate the constant bus restriction.
       (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
        !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {

diff  --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 4a4b865dc5d1d..7a519117f2482 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -463,6 +463,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
     case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
       NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
       break;
+    case AMDGPU::V_FMA_F64_e64:
+      if (ST->hasFmaakFmamkF64Insts())
+        NewOpcode = AMDGPU::V_FMAAK_F64;
+      break;
     }
   }
 
@@ -497,6 +501,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
     case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
       NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
       break;
+    case AMDGPU::V_FMA_F64_e64:
+      if (ST->hasFmaakFmamkF64Insts())
+        NewOpcode = AMDGPU::V_FMAMK_F64;
+      break;
     }
   }
 
@@ -961,7 +969,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
           MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
           MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
           MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
-          MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
+          MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
+          (MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
+           ST->hasFmaakFmamkF64Insts())) {
         shrinkMadFma(MI);
         continue;
       }

diff  --git a/llvm/test/CodeGen/AMDGPU/literal64.ll b/llvm/test/CodeGen/AMDGPU/literal64.ll
index bb281bd6b6c12..df4ff2c8d9851 100644
--- a/llvm/test/CodeGen/AMDGPU/literal64.ll
+++ b/llvm/test/CodeGen/AMDGPU/literal64.ll
@@ -256,17 +256,28 @@ define amdgpu_ps <2 x float> @v_lshl_add_u64(i64 %a) {
 ; No folding into VOP2 promoted to VOP3
 
 define amdgpu_ps <2 x float> @v_fma_f64(double %a, double %b) {
-; GCN-LABEL: v_fma_f64:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
-; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GCN-NEXT:    v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
-; GCN-NEXT:    v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
-; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3]
-; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GCN-NEXT:    v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
-; GCN-NEXT:    v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
-; GCN-NEXT:    ; return to shader part epilog
+; GCN-SDAG-LABEL: v_fma_f64:
+; GCN-SDAG:       ; %bb.0:
+; GCN-SDAG-NEXT:    v_fmaak_f64 v[4:5], v[0:1], v[2:3], lit64(0x4063233333333333)
+; GCN-SDAG-NEXT:    v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
+; GCN-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GCN-SDAG-NEXT:    v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
+; GCN-SDAG-NEXT:    v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
+; GCN-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GCN-SDAG-NEXT:    v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
+; GCN-SDAG-NEXT:    ; return to shader part epilog
+;
+; GCN-GISEL-LABEL: v_fma_f64:
+; GCN-GISEL:       ; %bb.0:
+; GCN-GISEL-NEXT:    v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
+; GCN-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GCN-GISEL-NEXT:    v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
+; GCN-GISEL-NEXT:    v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
+; GCN-GISEL-NEXT:    v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
+; GCN-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GCN-GISEL-NEXT:    v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
+; GCN-GISEL-NEXT:    v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
+; GCN-GISEL-NEXT:    ; return to shader part epilog
   %r1 = call double @llvm.fma.f64(double %a, double %b, double 153.1) nounwind readnone
   %r2 = call double @llvm.fma.f64(double %a, double %r1, double 200.1) nounwind readnone
   %r3 = call double @llvm.fma.f64(double %r2, double %r1, double 200.1) nounwind readnone

diff  --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
index afbbb05bfc3a5..764a1e1090181 100644
--- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX942 %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1250 -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX1250 %s
 
 ---
 name:            fold_simm_virtual
@@ -564,6 +565,144 @@ body:             |
 
 ...
 
+---
+name:            fmac_sreg_64_src0_to_fmamk_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; GFX942-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
+    ; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+    ; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
+    ;
+    ; GFX1250-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
+    ; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
+    ; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+    %3:vreg_64_align2 = V_FMAC_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN_TO_EPILOG %3
+...
+
+---
+name:            fmac_sreg_64_src1_to_fmamk_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fmac_sreg_64_src1_to_fmamk_f64
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+    ; GCN-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+    %3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN_TO_EPILOG %3
+...
+
+---
+name:            fmac_vreg_64_to_fmaak_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; GFX942-LABEL: name: fmac_vreg_64_to_fmaak_f64
+    ; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+    ; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
+    ; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
+    ;
+    ; GFX1250-LABEL: name: fmac_vreg_64_to_fmaak_f64
+    ; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+    %3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN_TO_EPILOG %3
+...
+
+---
+name:            fma_sreg_64_src0_to_fmamk_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; GFX942-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
+    ; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+    ; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
+    ;
+    ; GFX1250-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
+    ; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
+    ; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+    %3:vreg_64_align2 = V_FMA_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN_TO_EPILOG %3
+...
+
+---
+name:            fma_sreg_64_src1_to_fmamk_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: fma_sreg_64_src1_to_fmamk_f64
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+    ; GCN-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+    %3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN_TO_EPILOG %3
+...
+
+---
+name:            fma_vreg_64_to_fmaak_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; GFX942-LABEL: name: fma_vreg_64_to_fmaak_f64
+    ; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+    ; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
+    ; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
+    ;
+    ; GFX1250-LABEL: name: fma_vreg_64_to_fmaak_f64
+    ; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+    ; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
+    %0:vreg_64_align2 = IMPLICIT_DEF
+    %1:vreg_64_align2 = IMPLICIT_DEF
+    %2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+    %3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN_TO_EPILOG %3
+...
+
 ---
 name:            fold_v_mov_b32_e32_literal_to_agpr
 body:             |

diff  --git a/llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir b/llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir
new file mode 100644
index 0000000000000..be46831d3bfe5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir
@@ -0,0 +1,62 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass si-shrink-instructions %s -o - | FileCheck %s -check-prefix=GFX1250
+
+---
+name: fma_cvv_f64
+body: |
+  bb.0:
+    ; GFX1250-LABEL: name: fma_cvv_f64
+    ; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = V_FMA_F64_e64 0, 4638355772470722560, 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN implicit $vgpr4_vgpr5
+...
+
+---
+name: fma_vcv_f64
+body: |
+  bb.0:
+    ; GFX1250-LABEL: name: fma_vcv_f64
+    ; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, 4638355772470722560, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN implicit $vgpr4_vgpr5
+...
+
+---
+name: fma_vvc_f64
+body: |
+  bb.0:
+    ; GFX1250-LABEL: name: fma_vvc_f64
+    ; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN implicit $vgpr4_vgpr5
+...
+
+---
+name: fma_vsc_f64
+body: |
+  bb.0:
+    ; GFX1250-LABEL: name: fma_vsc_f64
+    ; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
+    ; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+    ; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
+    ; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
+    $vgpr0_vgpr1 = IMPLICIT_DEF
+    $vgpr2_vgpr3 = IMPLICIT_DEF
+    $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
+    SI_RETURN implicit $vgpr4_vgpr5
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir
index b0a75a526cf2b..316c34ebabcd2 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir
@@ -1,8 +1,10 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefixes=GCN,GFX90A %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s --passes=two-address-instruction -verify-each -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -run-pass twoaddressinstruction -o - | FileCheck -check-prefixes=GCN,GFX1250 %s
 
 # GCN-LABEL: name: test_fmamk_reg_imm_f64
-# GCN: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0, 4607182418800017408, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_imm_f64
 registers:
@@ -21,7 +23,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmamk_imm_reg_f64
-# GCN: V_FMA_F64_e64 0, %2, 0, killed %0.sub0_sub1, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, %2, 0, killed %0.sub0_sub1, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0.sub0_sub1, 4607182418800017408, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_imm_reg_f64
 registers:
@@ -40,7 +43,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_f64
-# GCN: V_FMA_F64_e64 0, killed %0.sub0_sub1, 0, %0.sub2_sub3, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0.sub0_sub1, 0, %0.sub2_sub3, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAAK_F64 killed %0.sub0_sub1, %0.sub2_sub3, 4607182418800017408, implicit $mode, implicit $exec
 ---
 name:            test_fmaak_f64
 registers:
@@ -57,7 +61,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_sgpr_src0_f64
-# GCN: V_FMA_F64_e64 0, killed %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0, 4607182418800017408, %2, implicit $mode, implicit $exec
 
 ---
 name:            test_fmaak_sgpr_src0_f64
@@ -77,7 +82,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_inlineimm_src0_f64
-# GCN: V_FMA_F64_e64 0, 4611686018427387904, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, 4611686018427387904, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 4611686018427387904, 4607182418800017408, %1, implicit $mode, implicit $exec
 
 ---
 name:            test_fmaak_inlineimm_src0_f64
@@ -95,7 +101,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_otherimm_src0_f64
-# GCN: V_FMAC_F64_e32 4636737291354636288, %0, %2, implicit $mode, implicit $exec
+# GFX90A: V_FMAC_F64_e32 4636737291354636288, %0, %2, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 %0, 4636737291354636288, %1, implicit $mode, implicit $exec
 
 ---
 name:            test_fmaak_otherimm_src0_f64
@@ -134,7 +141,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmamk_reg_unfoldable_literal_src0_f64
-# GCN: V_FMA_F64_e64 0, %2, 0, killed %0, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, %2, 0, killed %0, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0, 123456, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_unfoldable_literal_src0_f64
 registers:
@@ -153,7 +161,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmamk_reg_unfoldable_literal_src1_f64
-# GCN: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0, 123456, killed %1, implicit $mode, implicit $exec
 ---
 name:            test_fmamk_reg_unfoldable_literal_src1_f64
 registers:
@@ -172,7 +181,8 @@ body:             |
 ...
 
 # GCN-LABEL: name: test_fmaak_reg_unfoldable_literal_src2_f64
-# GCN: V_FMA_F64_e64 0, killed %0, 0, killed %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0, 0, killed %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAAK_F64 killed %0, killed %1, 123456, implicit $mode, implicit $exec
 ---
 name:            test_fmaak_reg_unfoldable_literal_src2_f64
 registers:


        


More information about the llvm-commits mailing list