[llvm] 01b8140 - [AMDGPU] Fix delay alu for VOPD with src2acc

Joe Nash via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 25 10:11:29 PDT 2022


Author: Joe Nash
Date: 2022-10-25T13:11:17-04:00
New Revision: 01b8140d3aac50ad4c00246bddab8646a61016ed

URL: https://github.com/llvm/llvm-project/commit/01b8140d3aac50ad4c00246bddab8646a61016ed
DIFF: https://github.com/llvm/llvm-project/commit/01b8140d3aac50ad4c00246bddab8646a61016ed.diff

LOG: [AMDGPU] Fix delay alu for VOPD with src2acc

V_FMAC_F32 and V_DOT2C_F32_F16 have a dummy src2 operand tied to vdst to
inform passes that the instructions read the dst operand. The VOPD
versions of these instructions lacked the dummy operand, which was a
problem for inserting s_delay_alu.
Introduce the dummy src2 operand on the VOPD versions, and fix the VOPD operand
tracking logic to account for it.

Reviewed By: dp

Differential Revision: https://reviews.llvm.org/D136629

Added: 
    llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir

Modified: 
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/lib/Target/AMDGPU/VOPDInstructions.td
    llvm/test/CodeGen/AMDGPU/vopd-combine.mir
    llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s

Removed: 
    llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index bba4120b9bb07..c32c56b1c8f32 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8550,7 +8550,7 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
     llvm_unreachable("Unhandled operand type in cvtVOPD");
   };
 
-  auto InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
+  const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
 
   // MCInst operands are ordered as follows:
   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
@@ -8560,9 +8560,11 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
   }
 
   for (auto CompIdx : VOPD::COMPONENTS) {
+    const auto &CInfo = InstInfo[CompIdx];
+    bool CompHasSrc2Acc = CInfo.hasSrc2Acc();
     auto SrcOperandsNum = InstInfo[CompIdx].getSrcOperandsNum();
     for (unsigned SrcIdx = 0; SrcIdx < SrcOperandsNum; ++SrcIdx) {
-      addOp(InstInfo[CompIdx].getParsedSrcIndex(SrcIdx));
+      addOp(CInfo.getParsedSrcIndex(SrcIdx, CompHasSrc2Acc));
     }
   }
 }

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 020c75f80dd3c..dda515595e4d9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -480,10 +480,10 @@ ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
   assert(TiedIdx == -1 || TiedIdx == Component::DST);
   HasSrc2Acc = TiedIdx != -1;
 
-  SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs() - HasSrc2Acc;
+  SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
   assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
 
-  auto OperandsNum = OpDesc.getNumOperands() - HasSrc2Acc;
+  auto OperandsNum = OpDesc.getNumOperands();
   for (unsigned OprIdx = Component::SRC1; OprIdx < OperandsNum; ++OprIdx) {
     if (OpDesc.OpInfo[OprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
       MandatoryLiteralIdx = OprIdx;
@@ -500,7 +500,7 @@ unsigned ComponentInfo::getParsedOperandIndex(unsigned OprIdx) const {
 
   auto SrcIdx = OprIdx - Component::DST_NUM;
   if (SrcIdx < getSrcOperandsNum())
-    return getParsedSrcIndex(SrcIdx);
+    return getParsedSrcIndex(SrcIdx, hasSrc2Acc());
 
   // The specified operand does not exist.
   return 0;
@@ -539,8 +539,6 @@ InstInfo::RegIndices InstInfo::getRegIndices(
   unsigned Src2Reg = 0;
   if (Comp.hasRegularSrcOperand(2))
     Src2Reg = GetRegIdx(ComponentIdx, Comp.getSrcIndex(2));
-  else if (Comp.hasSrc2Acc())
-    Src2Reg = DstReg;
 
   return {DstReg, Src0Reg, Src1Reg, Src2Reg};
 }
@@ -557,8 +555,9 @@ VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
   const auto &OpXDesc = InstrInfo->get(OpX);
   const auto &OpYDesc = InstrInfo->get(OpY);
   VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
-  VOPD::ComponentInfo OpYInfo(OpYDesc, VOPD::ComponentKind::COMPONENT_Y,
-                              OpXInfo.getSrcOperandsNum());
+  VOPD::ComponentInfo OpYInfo(
+      OpYDesc, VOPD::ComponentKind::COMPONENT_Y, OpXInfo.getSrcOperandsNum(),
+      OpXInfo.getSrcOperandsNum() - OpXInfo.hasSrc2Acc());
   return VOPD::InstInfo(OpXInfo, OpYInfo);
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index bfab25815c16c..a8642a0d1da85 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -549,18 +549,20 @@ class ComponentLayout {
   //   OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
   //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
   // Each ComponentKind has operand indices defined below.
-  static constexpr unsigned PARSED_DST_IDX[] = {1, 1, 4 /* + OpXSrcNum */};
-  static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {2, 2,
-                                                      5 /* + OpXSrcNum */};
+  static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
+                                                4 /* + ParsedOpXSrcNum */};
+  static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
+      2, 2, 5 /* + ParsedOpXSrcNum */};
 
 private:
   ComponentKind Kind;
   unsigned OpXSrcNum;
+  unsigned ParsedOpXSrcNum;
 
 public:
-  ComponentLayout(ComponentKind Kind_ = ComponentKind::SINGLE,
-                  unsigned OpXSrcNum_ = 0)
-      : Kind(Kind_), OpXSrcNum(OpXSrcNum_) {
+  ComponentLayout(ComponentKind Kind = ComponentKind::SINGLE,
+                  unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0)
+      : Kind(Kind), OpXSrcNum(OpXSrcNum), ParsedOpXSrcNum(ParsedOpXSrcNum) {
     assert(Kind <= ComponentKind::MAX);
     assert((Kind == ComponentKind::COMPONENT_Y) == (OpXSrcNum > 0));
   }
@@ -573,11 +575,15 @@ class ComponentLayout {
   }
 
   unsigned getParsedDstIndex() const {
-    return PARSED_DST_IDX[Kind] + OpXSrcNum;
+    return PARSED_DST_IDX[Kind] + ParsedOpXSrcNum;
   }
-  unsigned getParsedSrcIndex(unsigned SrcIdx) const {
+  unsigned getParsedSrcIndex(unsigned SrcIdx, bool ComponentHasSrc2Acc) const {
     assert(SrcIdx < Component::MAX_SRC_NUM);
-    return FIRST_PARSED_SRC_IDX[Kind] + OpXSrcNum + SrcIdx;
+    // FMAC and DOT2C have a src2 operand on the MCInst but
+    // not on the asm representation. src2 is tied to dst.
+    if (ComponentHasSrc2Acc && SrcIdx == (MAX_SRC_NUM - 1))
+      return getParsedDstIndex();
+    return FIRST_PARSED_SRC_IDX[Kind] + ParsedOpXSrcNum + SrcIdx;
   }
 };
 
@@ -616,8 +622,9 @@ class ComponentInfo : public ComponentLayout, public ComponentProps {
 public:
   ComponentInfo(const MCInstrDesc &OpDesc,
                 ComponentKind Kind = ComponentKind::SINGLE,
-                unsigned OpXSrcNum = 0)
-      : ComponentLayout(Kind, OpXSrcNum), ComponentProps(OpDesc) {}
+                unsigned OpXSrcNum = 0, unsigned ParsedOpXSrcNum = 0)
+      : ComponentLayout(Kind, OpXSrcNum, ParsedOpXSrcNum),
+        ComponentProps(OpDesc) {}
 
   // Map MC operand index to parsed operand index.
   // Return 0 if the specified operand does not exist.

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 507bac440389e..fc074789ce359 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -427,6 +427,15 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
   let InsVOP3Base  = getIns64<Src0VOP3DPP, Src1RC64, RegisterOperand<VGPR_32>, 3,
                        0, HasModifiers, HasModifiers, HasOMod,
                        Src0Mod, Src1Mod, Src2Mod>.ret;
+  // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
+  let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
+  let InsVOPDXDeferred =
+    (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X,
+         VGPR_32:$vsrc1X, VGPRSrc_32:$src2X);
+  let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y);
+  let InsVOPDYDeferred =
+    (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y,
+         VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y);
 
   let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
                      Src1ModDPP:$src1_modifiers, Src1DPP:$src1,

diff  --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
index 420f184360957..eb2e9f04022ed 100644
--- a/llvm/lib/Target/AMDGPU/VOPDInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
@@ -76,6 +76,21 @@ class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
   let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
   let mayRaiseFPException = ReadsModeReg;
 
+  // V_DUAL_FMAC and V_DUAL_DOT2ACC_F32_F16 need a dummy src2 tied to dst for
+  // passes to track its uses. Its presence does not affect VOPD formation rules
+  // because the rules for src2 and dst are the same. src2X and src2Y should not
+  // be encoded.
+  bit hasSrc2AccX = !or(!eq(VDX.Mnemonic, "v_fmac_f32"), !eq(VDX.Mnemonic, "v_dot2c_f32_f16"));
+  bit hasSrc2AccY = !or(!eq(VDY.Mnemonic, "v_fmac_f32"), !eq(VDY.Mnemonic, "v_dot2c_f32_f16"));
+  string ConstraintsX = !if(hasSrc2AccX, "$src2X = $vdstX", "");
+  string ConstraintsY = !if(hasSrc2AccY, "$src2Y = $vdstY", "");
+  let Constraints =
+      ConstraintsX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # ConstraintsY;
+  string DisableEncodingX = !if(hasSrc2AccX, "$src2X", "");
+  string DisableEncodingY = !if(hasSrc2AccY, "$src2Y", "");
+  let DisableEncoding =
+      DisableEncodingX # !if(!and(hasSrc2AccX, hasSrc2AccY), ", ", "") # DisableEncodingY;
+
   let Uses = RegListUnion<VDX.Uses, VDY.Uses>.ret;
   let Defs = RegListUnion<VDX.Defs, VDY.Defs>.ret;
   let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW);

diff  --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
index 47661f4c5953f..33c40b59eaaca 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
@@ -68,7 +68,7 @@ body:             |
     ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
     ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
     ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 10, killed $vgpr3, killed $vgpr1, $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     $vgpr0 = IMPLICIT_DEF
     $vgpr1 = IMPLICIT_DEF
     $vgpr2 = IMPLICIT_DEF
@@ -133,7 +133,7 @@ body:             |
     ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
     ; PAIR-NEXT: $sgpr20 = IMPLICIT_DEF
     ; PAIR-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec
-    ; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
+    ; PAIR-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
     ; PAIR-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc
     ; PAIR-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec
     ; PAIR-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc
@@ -246,7 +246,7 @@ body:             |
     ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
     ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
     ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 4, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     $vgpr0 = IMPLICIT_DEF
     $vgpr1 = IMPLICIT_DEF
     $vgpr2 = IMPLICIT_DEF
@@ -276,7 +276,7 @@ body:             |
     ; PAIR-NEXT: $vgpr0 = IMPLICIT_DEF
     ; PAIR-NEXT: $vgpr1 = IMPLICIT_DEF
     ; PAIR-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr5, $vgpr2 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32 killed $vgpr0, 100, killed $vgpr3, 100, killed $vgpr1, killed $vgpr2, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     $vgpr0 = IMPLICIT_DEF
     $vgpr1 = IMPLICIT_DEF
     $vgpr2 = IMPLICIT_DEF
@@ -447,7 +447,7 @@ body:             |
     ; PAIR-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec
     ; PAIR-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
     ; PAIR-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $exec, implicit $vcc, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc
-    ; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
+    ; PAIR-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
     ; PAIR-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc
     ; PAIR-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec
     ; PAIR-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec

diff  --git a/llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir b/llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir
deleted file mode 100644
index e1f1d45bc7ea3..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/vopd-fmac-delay.mir
+++ /dev/null
@@ -1,28 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s
-
-# FIXME: Second VOPD pair reads vgpr0 and vgpr1 written by first pair, so there
-# should be a delay.
----
-name: vopd_fmac_fmac
-tracksRegLiveness: true
-body: |
-  bb.0:
-    ; CHECK-LABEL: name: vopd_fmac_fmac
-    ; CHECK: $vgpr0 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
-    ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr3, $vgpr4, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr3, $vgpr4, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    $vgpr0 = IMPLICIT_DEF
-    $vgpr1 = IMPLICIT_DEF
-    $vgpr2 = IMPLICIT_DEF
-    $vgpr3 = IMPLICIT_DEF
-    $vgpr4 = IMPLICIT_DEF
-    $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
-    $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
-    $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
-    $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
-...

diff  --git a/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir
new file mode 100644
index 0000000000000..9b08ef78d1b01
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir
@@ -0,0 +1,51 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=gcn-create-vopd,amdgpu-insert-delay-alu %s -o - | FileCheck %s
+
+---
+name: vopd_fmac_fmac
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vopd_fmac_fmac
+    ; CHECK: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU 1
+    ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+    $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+    $vgpr0 = V_FMAC_F32_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+    $vgpr1 = V_FMAC_F32_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+...
+---
+name: vopd_dot2c_dot2c
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vopd_dot2c_dot2c
+    ; CHECK: $vgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr2 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU 1
+    ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    $vgpr2 = IMPLICIT_DEF
+    $vgpr3 = IMPLICIT_DEF
+    $vgpr4 = IMPLICIT_DEF
+    $vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+    $vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+    $vgpr0 = V_DOT2C_F32_F16_e32 $vgpr2, $vgpr3, $vgpr0, implicit $mode, implicit $exec
+    $vgpr1 = V_DOT2C_F32_F16_e32 $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec
+...

diff  --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
index f1682c5636ef0..3866cb458dbb0 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopd_errs.s
@@ -266,4 +266,4 @@ v_dual_fmac_f32     v7, v1, v2                   :: v_dual_fmamk_f32      v6, v2
 v_dual_fmamk_f32    v6, v1, 0xaf123456, v3       :: v_dual_fmac_f32       v5, v2, v3
 // GFX11: error: src2 operands must use 
diff erent VGPR banks
 // GFX11-NEXT:{{^}}v_dual_fmamk_f32    v6, v1, 0xaf123456, v3       :: v_dual_fmac_f32       v5, v2, v3
-// GFX11-NEXT:{{^}}                                        ^
+// GFX11-NEXT:{{^}}                                                                          ^


        


More information about the llvm-commits mailing list