[llvm] cbba8f0 - [AMDGPU] Codegen support for v_fmaak_f64/f_fmamk_f64 (#148734)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 14 17:57:10 PDT 2025
Author: Stanislav Mekhanoshin
Date: 2025-07-14T17:57:06-07:00
New Revision: cbba8f0acb537be00f649a640e98f8c92692fe9b
URL: https://github.com/llvm/llvm-project/commit/cbba8f0acb537be00f649a640e98f8c92692fe9b
DIFF: https://github.com/llvm/llvm-project/commit/cbba8f0acb537be00f649a640e98f8c92692fe9b.diff
LOG: [AMDGPU] Codegen support for v_fmaak_f64/f_fmamk_f64 (#148734)
Added:
llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/test/CodeGen/AMDGPU/literal64.ll
llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9b9291e8ef199..56fbb79c0d805 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3513,6 +3513,10 @@ static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc) {
? AMDGPU::V_FMAAK_F16_t16
: AMDGPU::V_FMAAK_F16_fake16
: AMDGPU::V_FMAAK_F16;
+ case AMDGPU::V_FMAC_F64_e32:
+ case AMDGPU::V_FMAC_F64_e64:
+ case AMDGPU::V_FMA_F64_e64:
+ return AMDGPU::V_FMAAK_F64;
default:
llvm_unreachable("invalid instruction");
}
@@ -3541,6 +3545,10 @@ static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc) {
? AMDGPU::V_FMAMK_F16_t16
: AMDGPU::V_FMAMK_F16_fake16
: AMDGPU::V_FMAMK_F16;
+ case AMDGPU::V_FMAC_F64_e32:
+ case AMDGPU::V_FMAC_F64_e64:
+ case AMDGPU::V_FMA_F64_e64:
+ return AMDGPU::V_FMAMK_F64;
default:
llvm_unreachable("invalid instruction");
}
@@ -3619,7 +3627,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMA_F64_e64 ||
+ Opc == AMDGPU::V_FMAC_F64_e64) {
// Don't fold if we are using source or output modifiers. The new VOP2
// instructions don't have them.
if (hasAnyModifiersSet(UseMI))
@@ -3691,7 +3700,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@@ -3759,7 +3769,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
- Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e64 || Opc == AMDGPU::V_FMAC_F64_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@@ -4080,8 +4091,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel);
- if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
- !IsLegacy &&
+ if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
+ (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
// If we have an SGPR input, we will violate the constant bus restriction.
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 4a4b865dc5d1d..7a519117f2482 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -463,6 +463,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
break;
+ case AMDGPU::V_FMA_F64_e64:
+ if (ST->hasFmaakFmamkF64Insts())
+ NewOpcode = AMDGPU::V_FMAAK_F64;
+ break;
}
}
@@ -497,6 +501,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
break;
+ case AMDGPU::V_FMA_F64_e64:
+ if (ST->hasFmaakFmamkF64Insts())
+ NewOpcode = AMDGPU::V_FMAMK_F64;
+ break;
}
}
@@ -961,7 +969,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
- MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
+ MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
+ (MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
+ ST->hasFmaakFmamkF64Insts())) {
shrinkMadFma(MI);
continue;
}
diff --git a/llvm/test/CodeGen/AMDGPU/literal64.ll b/llvm/test/CodeGen/AMDGPU/literal64.ll
index bb281bd6b6c12..df4ff2c8d9851 100644
--- a/llvm/test/CodeGen/AMDGPU/literal64.ll
+++ b/llvm/test/CodeGen/AMDGPU/literal64.ll
@@ -256,17 +256,28 @@ define amdgpu_ps <2 x float> @v_lshl_add_u64(i64 %a) {
; No folding into VOP2 promoted to VOP3
define amdgpu_ps <2 x float> @v_fma_f64(double %a, double %b) {
-; GCN-LABEL: v_fma_f64:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GCN-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
-; GCN-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
-; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[2:3]
-; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GCN-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
-; GCN-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
-; GCN-NEXT: ; return to shader part epilog
+; GCN-SDAG-LABEL: v_fma_f64:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: v_fmaak_f64 v[4:5], v[0:1], v[2:3], lit64(0x4063233333333333)
+; GCN-SDAG-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
+; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GCN-SDAG-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
+; GCN-SDAG-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
+; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
+; GCN-SDAG-NEXT: ; return to shader part epilog
+;
+; GCN-GISEL-LABEL: v_fma_f64:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: v_mov_b64_e32 v[4:5], lit64(0x4063233333333333)
+; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GCN-GISEL-NEXT: v_fmac_f64_e32 v[4:5], v[0:1], v[2:3]
+; GCN-GISEL-NEXT: v_mov_b64_e32 v[2:3], lit64(0x4069033333333333)
+; GCN-GISEL-NEXT: v_fmaak_f64 v[0:1], v[0:1], v[4:5], lit64(0x4069033333333333)
+; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GCN-GISEL-NEXT: v_fmac_f64_e32 v[2:3], v[0:1], v[4:5]
+; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
+; GCN-GISEL-NEXT: ; return to shader part epilog
%r1 = call double @llvm.fma.f64(double %a, double %b, double 153.1) nounwind readnone
%r2 = call double @llvm.fma.f64(double %a, double %r1, double 200.1) nounwind readnone
%r3 = call double @llvm.fma.f64(double %r2, double %r1, double 200.1) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
index afbbb05bfc3a5..764a1e1090181 100644
--- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX942 %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1250 -run-pass peephole-opt -o - %s | FileCheck -check-prefixes=GCN,GFX1250 %s
---
name: fold_simm_virtual
@@ -564,6 +565,144 @@ body: |
...
+---
+name: fmac_sreg_64_src0_to_fmamk_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+
+ ; GFX942-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
+ ; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+ ; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
+ ;
+ ; GFX1250-LABEL: name: fmac_sreg_64_src0_to_fmamk_f64
+ ; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
+ ; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
+ %0:vreg_64_align2 = IMPLICIT_DEF
+ %1:vreg_64_align2 = IMPLICIT_DEF
+ %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+ %3:vreg_64_align2 = V_FMAC_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN_TO_EPILOG %3
+...
+
+---
+name: fmac_sreg_64_src1_to_fmamk_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fmac_sreg_64_src1_to_fmamk_f64
+ ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+ ; GCN-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
+ %0:vreg_64_align2 = IMPLICIT_DEF
+ %1:vreg_64_align2 = IMPLICIT_DEF
+ %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+ %3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN_TO_EPILOG %3
+...
+
+---
+name: fmac_vreg_64_to_fmaak_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+
+ ; GFX942-LABEL: name: fmac_vreg_64_to_fmaak_f64
+ ; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+ ; GFX942-NEXT: [[V_FMAC_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMAC_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMAC_F64_e64_]]
+ ;
+ ; GFX1250-LABEL: name: fmac_vreg_64_to_fmaak_f64
+ ; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
+ %0:vreg_64_align2 = IMPLICIT_DEF
+ %1:vreg_64_align2 = IMPLICIT_DEF
+ %2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+ %3:vreg_64_align2 = V_FMAC_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN_TO_EPILOG %3
+...
+
+---
+name: fma_sreg_64_src0_to_fmamk_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+
+ ; GFX942-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
+ ; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX942-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+ ; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[S_MOV_B]], 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
+ ;
+ ; GFX1250-LABEL: name: fma_sreg_64_src0_to_fmamk_f64
+ ; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX1250-NEXT: [[V_FMAMK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAMK_F64 [[DEF]], 1311768467750121200, [[DEF1]], implicit $mode, implicit $exec
+ ; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAMK_F64_]]
+ %0:vreg_64_align2 = IMPLICIT_DEF
+ %1:vreg_64_align2 = IMPLICIT_DEF
+ %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+ %3:vreg_64_align2 = V_FMA_F64_e64 0, %2, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN_TO_EPILOG %3
+...
+
+---
+name: fma_sreg_64_src1_to_fmamk_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: fma_sreg_64_src1_to_fmamk_f64
+ ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+ ; GCN-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
+ %0:vreg_64_align2 = IMPLICIT_DEF
+ %1:vreg_64_align2 = IMPLICIT_DEF
+ %2:sreg_64 = S_MOV_B64_IMM_PSEUDO 1311768467750121200
+ %3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %1, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN_TO_EPILOG %3
+...
+
+---
+name: fma_vreg_64_to_fmaak_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+
+ ; GFX942-LABEL: name: fma_vreg_64_to_fmaak_f64
+ ; GFX942: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX942-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX942-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+ ; GFX942-NEXT: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = V_FMA_F64_e64 0, [[DEF]], 0, [[DEF1]], 0, [[V_MOV_B]], 0, 0, implicit $mode, implicit $exec
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG [[V_FMA_F64_e64_]]
+ ;
+ ; GFX1250-LABEL: name: fma_vreg_64_to_fmaak_f64
+ ; GFX1250: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; GFX1250-NEXT: [[V_FMAAK_F64_:%[0-9]+]]:vreg_64_align2 = V_FMAAK_F64 [[DEF]], [[DEF1]], 1311768467750121200, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: SI_RETURN_TO_EPILOG [[V_FMAAK_F64_]]
+ %0:vreg_64_align2 = IMPLICIT_DEF
+ %1:vreg_64_align2 = IMPLICIT_DEF
+ %2:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+ %3:vreg_64_align2 = V_FMA_F64_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN_TO_EPILOG %3
+...
+
---
name: fold_v_mov_b32_e32_literal_to_agpr
body: |
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir b/llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir
new file mode 100644
index 0000000000000..be46831d3bfe5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/shrink-fma-f64.mir
@@ -0,0 +1,62 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass si-shrink-instructions %s -o - | FileCheck %s -check-prefix=GFX1250
+
+---
+name: fma_cvv_f64
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: fma_cvv_f64
+ ; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
+ ; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+ ; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
+ $vgpr0_vgpr1 = IMPLICIT_DEF
+ $vgpr2_vgpr3 = IMPLICIT_DEF
+ $vgpr4_vgpr5 = V_FMA_F64_e64 0, 4638355772470722560, 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN implicit $vgpr4_vgpr5
+...
+
+---
+name: fma_vcv_f64
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: fma_vcv_f64
+ ; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
+ ; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+ ; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAMK_F64 $vgpr0_vgpr1, 4638355772470722560, $vgpr2_vgpr3, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
+ $vgpr0_vgpr1 = IMPLICIT_DEF
+ $vgpr2_vgpr3 = IMPLICIT_DEF
+ $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, 4638355772470722560, 0, $vgpr2_vgpr3, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN implicit $vgpr4_vgpr5
+...
+
+---
+name: fma_vvc_f64
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: fma_vvc_f64
+ ; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
+ ; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+ ; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
+ $vgpr0_vgpr1 = IMPLICIT_DEF
+ $vgpr2_vgpr3 = IMPLICIT_DEF
+ $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN implicit $vgpr4_vgpr5
+...
+
+---
+name: fma_vsc_f64
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: fma_vsc_f64
+ ; GFX1250: $vgpr0_vgpr1 = IMPLICIT_DEF
+ ; GFX1250-NEXT: $vgpr2_vgpr3 = IMPLICIT_DEF
+ ; GFX1250-NEXT: $vgpr4_vgpr5 = V_FMAAK_F64 $vgpr0_vgpr1, $vgpr2_vgpr3, 4638355772470722560, implicit $mode, implicit $exec
+ ; GFX1250-NEXT: SI_RETURN implicit $vgpr4_vgpr5
+ $vgpr0_vgpr1 = IMPLICIT_DEF
+ $vgpr2_vgpr3 = IMPLICIT_DEF
+ $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, 4638355772470722560, 0, 0, implicit $mode, implicit $exec
+ SI_RETURN implicit $vgpr4_vgpr5
+...
diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir
index b0a75a526cf2b..316c34ebabcd2 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma-f64.mir
@@ -1,8 +1,10 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefixes=GCN,GFX90A %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a %s --passes=two-address-instruction -verify-each -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -run-pass twoaddressinstruction -o - | FileCheck -check-prefixes=GCN,GFX1250 %s
# GCN-LABEL: name: test_fmamk_reg_imm_f64
-# GCN: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0, 4607182418800017408, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f64
registers:
@@ -21,7 +23,8 @@ body: |
...
# GCN-LABEL: name: test_fmamk_imm_reg_f64
-# GCN: V_FMA_F64_e64 0, %2, 0, killed %0.sub0_sub1, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, %2, 0, killed %0.sub0_sub1, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0.sub0_sub1, 4607182418800017408, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_imm_reg_f64
registers:
@@ -40,7 +43,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_f64
-# GCN: V_FMA_F64_e64 0, killed %0.sub0_sub1, 0, %0.sub2_sub3, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0.sub0_sub1, 0, %0.sub2_sub3, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAAK_F64 killed %0.sub0_sub1, %0.sub2_sub3, 4607182418800017408, implicit $mode, implicit $exec
---
name: test_fmaak_f64
registers:
@@ -57,7 +61,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_sgpr_src0_f64
-# GCN: V_FMA_F64_e64 0, killed %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0, 4607182418800017408, %2, implicit $mode, implicit $exec
---
name: test_fmaak_sgpr_src0_f64
@@ -77,7 +82,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_inlineimm_src0_f64
-# GCN: V_FMA_F64_e64 0, 4611686018427387904, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, 4611686018427387904, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 4611686018427387904, 4607182418800017408, %1, implicit $mode, implicit $exec
---
name: test_fmaak_inlineimm_src0_f64
@@ -95,7 +101,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_otherimm_src0_f64
-# GCN: V_FMAC_F64_e32 4636737291354636288, %0, %2, implicit $mode, implicit $exec
+# GFX90A: V_FMAC_F64_e32 4636737291354636288, %0, %2, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 %0, 4636737291354636288, %1, implicit $mode, implicit $exec
---
name: test_fmaak_otherimm_src0_f64
@@ -134,7 +141,8 @@ body: |
...
# GCN-LABEL: name: test_fmamk_reg_unfoldable_literal_src0_f64
-# GCN: V_FMA_F64_e64 0, %2, 0, killed %0, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, %2, 0, killed %0, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0, 123456, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_unfoldable_literal_src0_f64
registers:
@@ -153,7 +161,8 @@ body: |
...
# GCN-LABEL: name: test_fmamk_reg_unfoldable_literal_src1_f64
-# GCN: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0, 0, %2, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAMK_F64 killed %0, 123456, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_unfoldable_literal_src1_f64
registers:
@@ -172,7 +181,8 @@ body: |
...
# GCN-LABEL: name: test_fmaak_reg_unfoldable_literal_src2_f64
-# GCN: V_FMA_F64_e64 0, killed %0, 0, killed %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GFX90A: V_FMA_F64_e64 0, killed %0, 0, killed %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GFX1250: V_FMAAK_F64 killed %0, killed %1, 123456, implicit $mode, implicit $exec
---
name: test_fmaak_reg_unfoldable_literal_src2_f64
registers:
More information about the llvm-commits
mailing list