[llvm] 01b8140 - [AMDGPU] Fix delay alu for VOPD with src2acc
Joe Nash via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 25 10:11:29 PDT 2022
Author: Joe Nash
Date: 2022-10-25T13:11:17-04:00
New Revision: 01b8140d3aac50ad4c00246bddab8646a61016ed
URL: https://github.com/llvm/llvm-project/commit/01b8140d3aac50ad4c00246bddab8646a61016ed
DIFF: https://github.com/llvm/llvm-project/commit/01b8140d3aac50ad4c00246bddab8646a61016ed.diff
LOG: [AMDGPU] Fix delay alu for VOPD with src2acc
V_FMAC_F32 and V_DOT2C_F32_F16 have a dummy src2 operand tied to vdst to
inform passes that the instructions read the dst operand. The VOPD
versions of these instructions lacked the dummy operand, which was a
problem for inserting s_delay_alu.
Introduce the dummy src2 operand on the VOPD versions, and fix the VOPD operand
tracking logic to account for it.
Reviewed By: dp
Differential Revision: https://reviews.llvm.org/D136629
Added:
llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/lib/Target/AMDGPU/VOPDInstructions.td
llvm/test/CodeGen/AMDGPU/vopd-combine.mir
llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
Removed:
llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index bba4120b9bb07..c32c56b1c8f32 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8550,7 +8550,7 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
llvm_unreachable("Unhandled operand type in cvtVOPD");
};
- auto InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
+ const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
// MCInst operands are ordered as follows:
// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
@@ -8560,9 +8560,11 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
}
for (auto CompIdx : VOPD::COMPONENTS) {
+ const auto &CInfo = InstInfo[CompIdx];
+ bool CompHasSrc2Acc = CInfo.hasSrc2Acc();
auto SrcOperandsNum = InstInfo[CompIdx].getSrcOperandsNum();
for (unsigned SrcIdx = 0; SrcIdx < SrcOperandsNum; ++SrcIdx) {
- addOp(InstInfo[CompIdx].getParsedSrcIndex(SrcIdx));
+ addOp(CInfo.getParsedSrcIndex(SrcIdx, CompHasSrc2Acc));
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 020c75f80dd3c..dda515595e4d9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -480,10 +480,10 @@ ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
assert(TiedIdx == -1 || TiedIdx == Component::DST);
HasSrc2Acc = TiedIdx != -1;
- SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs() - HasSrc2Acc;
+ SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
- auto OperandsNum = OpDesc.getNumOperands() - HasSrc2Acc;
+ auto OperandsNum = OpDesc.getNumOperands();
for (unsigned OprIdx = Component::SRC1; OprIdx < OperandsNum; ++OprIdx) {
if (OpDesc.OpInfo[OprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
MandatoryLiteralIdx = OprIdx;
@@ -500,7 +500,7 @@ unsigned ComponentInfo::getParsedOperandIndex(unsigned OprIdx) const {
auto SrcIdx = OprIdx - Component::DST_NUM;
if (SrcIdx < getSrcOperandsNum())
- return getParsedSrcIndex(SrcIdx);
+ return getParsedSrcIndex(SrcIdx, hasSrc2Acc());
// The specified operand does not exist.
return 0;
@@ -539,8 +539,6 @@ InstInfo::RegIndices InstInfo::getRegIndices(
unsigned Src2Reg = 0;
if (Comp.hasRegularSrcOperand(2))
Src2Reg = GetRegIdx(ComponentIdx, Comp.getSrcIndex(2));
- else if (Comp.hasSrc2Acc())
- Src2Reg = DstReg;
return {DstReg, Src0Reg, Src1Reg, Src2Reg};
}
@@ -557,8 +555,9 @@ VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
const auto &OpXDesc = InstrInfo->get(OpX);
const auto &OpYDesc = InstrInfo->get(OpY);
VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
- VOPD::ComponentInfo OpYInfo(OpYDesc, VOPD::ComponentKind::COMPONENT_Y,
- OpXInfo.getSrcOperandsNum());
+ VOPD::ComponentInfo OpYInfo(
+ OpYDesc, VOPD::ComponentKind::COMPONENT_Y, OpXInfo.getSrcOperandsNum(),
+ OpXInfo.getSrcOperandsNum() - OpXInfo.hasSrc2Acc());
return VOPD::InstInfo(OpXInfo, OpYInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index bfab25815c16c..a8642a0d1da85 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -549,18 +549,20 @@ class ComponentLayout {
// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
// Each ComponentKind has operand indices defined below.
- static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 4 /* + OpXSrcNum */};
- static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {2, 2,
- 5 /* + OpXSrcNum */};
+ static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
+ 4 /* + ParsedOpXSrcNum */};
+ static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
+ 2, 2, 5 /* + ParsedOpXSrcNum */};
private:
ComponentKind Kind;
unsigned OpXSrcNum;
+ unsigned ParsedOpXSrcNum;
public:
- ComponentLayout(ComponentKind Kind_ = ComponentKind::SINGLE,
- unsigned OpXSrcNum_ = 0)
- : Kind(Kind_), OpXSrcNum(OpXSrcNum_) {
+ ComponentLayout(ComponentKind Kind = ComponentKind::SINGLE,
+ unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0)
+ : Kind(Kind), OpXSrcNum(OpXSrcNum), ParsedOpXSrcNum(ParsedOpXSrcNum) {
assert(Kind <= ComponentKind::MAX);
assert((Kind == ComponentKind::COMPONENT_Y) == (OpXSrcNum > 0));
}
@@ -573,11 +575,15 @@ class ComponentLayout {
}
unsigned getParsedDstIndex() const {
- return PARSED_DST_IDX[Kind] + OpXSrcNum;
+ return PARSED_DST_IDX[Kind] + ParsedOpXSrcNum;
}
- unsigned getParsedSrcIndex(unsigned SrcIdx) const {
+ unsigned getParsedSrcIndex(unsigned SrcIdx, bool ComponentHasSrc2Acc) const {
assert(SrcIdx < Component::MAX_SRC_NUM);
- return FIRST_PARSED_SRC_IDX[Kind] + OpXSrcNum + SrcIdx;
+ // FMAC and DOT2C have a src2 operand on the MCInst but
+ // not on the asm representation. src2 is tied to dst.
+ if (ComponentHasSrc2Acc && SrcIdx == (MAX_SRC_NUM - 1))
+ return getParsedDstIndex();
+ return FIRST_PARSED_SRC_IDX[Kind] + ParsedOpXSrcNum + SrcIdx;
}
};
@@ -616,8 +622,9 @@ class ComponentInfo : public ComponentLayout, public ComponentProps {
public:
ComponentInfo(const MCInstrDesc &OpDesc,
ComponentKind Kind = ComponentKind::SINGLE,
- unsigned OpXSrcNum = 0)
- : ComponentLayout(Kind, OpXSrcNum), ComponentProps(OpDesc) {}
+ unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0)
+ : ComponentLayout(Kind, OpXSrcNum, ParsedOpXSrcNum),
+ ComponentProps(OpDesc) {}
// Map MC operand index to parsed operand index.
// Return 0 if the specified operand does not exist.
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 507bac440389e..fc074789ce359 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -427,6 +427,15 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
let InsVOP3Base = getIns64<Src0VOP3DPP, Src1RC64, RegisterOperand<VGPR_32>, 3,
0, HasModifiers, HasModifiers, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
+ // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
+ let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
+ let InsVOPDXDeferred =
+ (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X,
+ VGPR_32:$vsrc1X, VGPRSrc_32:$src2X);
+ let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y);
+ let InsVOPDYDeferred =
+ (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y,
+ VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y);
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
index 420f184360957..eb2e9f04022ed 100644
--- a/llvm/lib/Target/AMDGPU/VOPDInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
@@ -76,6 +76,21 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
let mayRaiseFPException = ReadsModeReg;
+ // V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 need a dummy src2 tied to dst for
+ // passes to track its uses. Its presence does not affect VOPD formation rules
+ // because the rules for src2 and dst are the same. src2X and src2Y should not
+ // be encoded.
+ bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"));
+ bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"));
+ string ConstraintsX = !if(hasSrc2AccX, "$src2X = $vdstX", "");
+ string ConstraintsY = !if(hasSrc2AccY, "$src2Y = $vdstY", "");
+ let Constraints =
+ ConstraintsX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # ConstraintsY;
+ string DisableEncodingX = !if(hasSrc2AccX, "$src2X", "");
+ string DisableEncodingY = !if(hasSrc2AccY, "$src2Y", "");
+ let DisableEncoding =
+ DisableEncodingX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # DisableEncodingY;
+
let Uses = RegListUnion<VDX.Uses, VDY.Uses>.ret;
let Defs = RegListUnion<VDX.Defs, VDY.Defs>.ret;
let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW);
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
index 47661f4c5953f..33c40b59eaaca 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
@@ -68,7 +68,7 @@ body: |
; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
- ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = IMPLICIT_DEF
@@ -133,7 +133,7 @@ body: |
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
; PAIR-NEXT: $sgpr20 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec
- ; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
+ ; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
; PAIR-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc
; PAIR-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc
@@ -246,7 +246,7 @@ body: |
; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
- ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = IMPLICIT_DEF
@@ -276,7 +276,7 @@ body: |
; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
- ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = IMPLICIT_DEF
@@ -447,7 +447,7 @@ body: |
; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
- ; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
+ ; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc
; PAIR-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
; PAIR-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir b/llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir
deleted file mode 100644
index e1f1d45bc7ea3..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir
+++ /dev/null
@@ -1,28 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s
-
-# FIXME: Second VOPD pair reads vgpr0 and vgpr1 written by first pair, so there
-# should be a delay.
----
-name: vopd_fmac_fmac
-tracksRegLiveness: true
-body: |
- bb.0:
- ; CHECK-LABEL: name: vopd_fmac_fmac
- ; CHECK: $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr3, $vgpr4, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
- ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr3, $vgpr4, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
- $vgpr0 = IMPLICIT_DEF
- $vgpr1 = IMPLICIT_DEF
- $vgpr2 = IMPLICIT_DEF
- $vgpr3 = IMPLICIT_DEF
- $vgpr4 = IMPLICIT_DEF
- $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
- $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
- $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
- $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
-...
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir
new file mode 100644
index 0000000000000..9b08ef78d1b01
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir
@@ -0,0 +1,51 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s
+
+---
+name: vopd_fmac_fmac
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vopd_fmac_fmac
+ ; CHECK: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU 1
+ ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = IMPLICIT_DEF
+ $vgpr2 = IMPLICIT_DEF
+ $vgpr3 = IMPLICIT_DEF
+ $vgpr4 = IMPLICIT_DEF
+ $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+ $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+...
+---
+name: vopd_dot2c_dot2c
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vopd_dot2c_dot2c
+ ; CHECK: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU 1
+ ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = IMPLICIT_DEF
+ $vgpr2 = IMPLICIT_DEF
+ $vgpr3 = IMPLICIT_DEF
+ $vgpr4 = IMPLICIT_DEF
+ $vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+ $vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+...
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
index f1682c5636ef0..3866cb458dbb0 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
@@ -266,4 +266,4 @@ v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmamk_f32 v6, v2
v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3
// GFX11: error: src2 operands must use
diff erent VGPR banks
// GFX11-NEXT:{{^}}v_dual_fmamk_f32 v6, v1, 0xaf123456, v3 :: v_dual_fmac_f32 v5, v2, v3
-// GFX11-NEXT:{{^}} ^
+// GFX11-NEXT:{{^}} ^
More information about the llvm-commits
mailing list