[llvm] edf2d0a - [AMDGPU] Introduce a pseudo mnemonic for S_DELAY_ALU in MIR. (#96004)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 20 12:59:13 PDT 2024


Author: Michael Bedy
Date: 2024-06-20T15:59:08-04:00
New Revision: edf2d0a95e9f0bd1befd8ff7686cb0e07af14fc3

URL: https://github.com/llvm/llvm-project/commit/edf2d0a95e9f0bd1befd8ff7686cb0e07af14fc3
DIFF: https://github.com/llvm/llvm-project/commit/edf2d0a95e9f0bd1befd8ff7686cb0e07af14fc3.diff

LOG: [AMDGPU] Introduce a pseudo mnemonic for S_DELAY_ALU in MIR. (#96004)

Added: 
    llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
    llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
    llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
    llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
index 6ec4178053b20..11f0cba47afde 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
@@ -17,6 +17,157 @@
 
 using namespace llvm;
 
+void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI,
+                      std::optional<unsigned int> OpIdx, int64_t Imm) const {
+
+  switch (MI.getOpcode()) {
+  case AMDGPU::S_DELAY_ALU:
+    assert(OpIdx == 0);
+    printSDelayAluImm(Imm, OS);
+    break;
+  default:
+    MIRFormatter::printImm(OS, MI, OpIdx, Imm);
+    break;
+  }
+}
+
+/// Implement target specific parsing of immediate mnemonics. The mnemonic is
+/// a string with a leading dot.
+bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode,
+                              const unsigned OpIdx,
+                              StringRef Src, int64_t &Imm,
+                              ErrorCallbackType ErrorCallback) const
+{
+
+  switch (OpCode) {
+  case AMDGPU::S_DELAY_ALU:
+    return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback);
+  default:
+    break;
+  }
+  return true; // Don't know what this is
+}
+
+void AMDGPUMIRFormatter::printSDelayAluImm(int64_t Imm,
+                                           llvm::raw_ostream &OS) const {
+  // Construct an immediate string to represent the information encoded in the
+  // s_delay_alu immediate.
+  // .id0_<dep>[_skip_<count>_id1<dep>]
+  constexpr int64_t None = 0;
+  constexpr int64_t Same = 0;
+
+  uint64_t Id0 = (Imm & 0xF);
+  uint64_t Skip = ((Imm >> 4) & 0x7);
+  uint64_t Id1 = ((Imm >> 7) & 0xF);
+  auto Outdep = [&](uint64_t Id) {
+    if (Id == None)
+      OS << "NONE";
+    else if (Id < 5)
+      OS << "VALU_DEP_" << Id;
+    else if (Id < 8)
+      OS << "TRANS32_DEP_" << Id - 4;
+    else
+      OS << "SALU_CYCLE_" << Id - 8;
+  };
+
+  OS << ".id0_";
+  Outdep(Id0);
+
+  // If the second inst is "same" and "none", no need to print the rest of the
+  // string.
+  if (Skip == Same && Id1 == None)
+    return;
+
+  // Encode the second delay specification.
+  OS << "_skip_";
+  if (Skip == 0)
+    OS << "SAME";
+  else if (Skip == 1)
+    OS << "NEXT";
+  else
+    OS << "SKIP_" << Skip - 1;
+
+  OS << "_id1_";
+  Outdep(Id1);
+}
+
+bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic(
+    const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src,
+    llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) const
+{
+  assert(OpIdx == 0);
+
+  Imm = 0;
+  bool Expected = Src.consume_front(".id0_");
+  if (!Expected)
+    return ErrorCallback(Src.begin(), "Expected .id0_");
+
+  auto ExpectInt = [&](StringRef &Src, int64_t Offset) -> int64_t {
+    int64_t Dep;
+    if (!Src.consumeInteger(10, Dep))
+      return Dep + Offset;
+
+    return -1;
+  };
+
+  auto DecodeDelay = [&](StringRef &Src) -> int64_t {
+    if (Src.consume_front("NONE"))
+      return 0;
+    if (Src.consume_front("VALU_DEP_"))
+      return ExpectInt(Src, 0);
+    if (Src.consume_front("TRANS32_DEP_"))
+      return ExpectInt(Src, 4);
+    if (Src.consume_front("SALU_CYCLE_"))
+      return ExpectInt(Src, 8);
+
+    return -1;
+  };
+
+  int64_t Delay0 = DecodeDelay(Src);
+  int64_t Skip = 0;
+  int64_t Delay1 = 0;
+  if (Delay0 == -1)
+    return ErrorCallback(Src.begin(), "Could not decode delay0");
+
+
+  // Set the Imm so far, to that early return has the correct value.
+  Imm = Delay0;
+
+  // If that was the end of the string, the second instruction is "same" and
+  // "none"
+  if (Src.begin() == Src.end())
+    return false;
+
+  Expected = Src.consume_front("_skip_");
+  if (!Expected)
+    return ErrorCallback(Src.begin(), "Expected _skip_");
+
+
+  if (Src.consume_front("SAME")) {
+    Skip = 0;
+  } else if (Src.consume_front("NEXT")) {
+    Skip = 1;
+  } else if (Src.consume_front("SKIP_")) {
+    if (Src.consumeInteger(10, Skip)) {
+      return ErrorCallback(Src.begin(), "Expected integer Skip value");
+    }
+    Skip += 1;
+  } else {
+    ErrorCallback(Src.begin(), "Unexpected Skip Value");
+  }
+
+  Expected = Src.consume_front("_id1_");
+  if (!Expected)
+    return ErrorCallback(Src.begin(), "Expected _id1_");
+
+  Delay1 = DecodeDelay(Src);
+  if (Delay1 == -1)
+    return ErrorCallback(Src.begin(), "Could not decode delay1");
+
+  Imm = Imm | (Skip << 4) | (Delay1 << 7);
+  return false;
+}
+
 bool AMDGPUMIRFormatter::parseCustomPseudoSourceValue(
     StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS,
     const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const {

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
index 98b5031071cf4..c5c9473752529 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
@@ -28,12 +28,35 @@ class AMDGPUMIRFormatter final : public MIRFormatter {
   AMDGPUMIRFormatter() = default;
   virtual ~AMDGPUMIRFormatter() = default;
 
+  /// Implement target specific printing for machine operand immediate value, so
+  /// that we can have more meaningful mnemonic than a 64-bit integer. Passing
+  /// None to OpIdx means the index is unknown.
+  virtual void printImm(raw_ostream &OS, const MachineInstr &MI,
+                        std::optional<unsigned> OpIdx,
+                        int64_t Imm) const override;
+
+  /// Implement target specific parsing of immediate mnemonics. The mnemonic is
+  /// a string with a leading dot.
+  virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
+                                StringRef Src, int64_t &Imm,
+                                ErrorCallbackType ErrorCallback) const override;
+
   /// Implement target specific parsing of target custom pseudo source value.
   bool
   parseCustomPseudoSourceValue(StringRef Src, MachineFunction &MF,
                                PerFunctionMIParsingState &PFS,
                                const PseudoSourceValue *&PSV,
                                ErrorCallbackType ErrorCallback) const override;
+
+private:
+  /// Print the string to represent s_delay_alu immediate value
+  void printSDelayAluImm(int64_t Imm, llvm::raw_ostream &OS) const;
+
+  /// Parse the immediate pseudo literal for s_delay_alu
+  bool parseSDelayAluImmMnemonic(
+      const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src,
+      llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) const;
+
 };
 
 } // end namespace llvm

diff  --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
new file mode 100644
index 0000000000000..18bc442ae98e0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
@@ -0,0 +1,192 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-delay-alu %s -o - | FileCheck %s
+
+---
+name: valu_dep_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: valu_dep_1
+    ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: valu_dep_2
+    ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_3
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: valu_dep_3
+    ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_3
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_4
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: valu_dep_4
+    ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_4
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: trans32_dep_1
+    ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: trans32_dep_2
+    ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_2
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_3
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: trans32_dep_3
+    ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_3
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: salu_cycle_1
+    ; CHECK: $sgpr0 = S_MOV_B32 0
+    ; CHECK-NEXT: S_DELAY_ALU .id0_SALU_CYCLE_1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+    $sgpr0 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_same_trans32_dep_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: valu_dep_1_same_trans32_dep_1
+    ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_1_skip_SAME_id1_VALU_DEP_1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_same_salu_cycle_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: valu_dep_1_same_salu_cycle_1
+    ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1_skip_SAME_id1_SALU_CYCLE_1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $sgpr0 = S_MOV_B32 0
+    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_next_valu_dep_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: valu_dep_1_next_valu_dep_1
+    ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1_skip_NEXT_id1_VALU_DEP_1
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2_next_valu_dep_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: valu_dep_2_next_valu_dep_2
+    ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2_skip_NEXT_id1_VALU_DEP_2
+    ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_2_skip_valu_dep_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: valu_dep_2_skip_valu_dep_2
+    ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2_skip_SKIP_1_id1_VALU_DEP_2
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr4 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir
new file mode 100644
index 0000000000000..af2e6001502f6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir
@@ -0,0 +1,217 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -start-after=amdgpu-insert-delay-alu %s -o - | FileCheck %s
+
+---
+name: valu_dep_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}valu_dep_1:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    S_DELAY_ALU .id0_VALU_DEP_1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}valu_dep_2:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+    ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    S_DELAY_ALU .id0_VALU_DEP_2
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_3
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}valu_dep_3:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+    ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2
+    ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    S_DELAY_ALU .id0_VALU_DEP_3
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_4
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}valu_dep_4:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+    ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2
+    ; CHECK-NEXT: v_add_nc_u32_e32 v3, v3, v3
+    ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_4)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    S_DELAY_ALU .id0_VALU_DEP_4
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}trans32_dep_1:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_exp_f32_e32 v0, v0
+    ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    S_DELAY_ALU .id0_TRANS32_DEP_1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}trans32_dep_2:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_exp_f32_e32 v0, v0
+    ; CHECK-NEXT: v_exp_f32_e32 v1, v1
+    ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    S_DELAY_ALU .id0_TRANS32_DEP_2
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_3
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}trans32_dep_3:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_exp_f32_e32 v0, v0
+    ; CHECK-NEXT: v_exp_f32_e32 v1, v1
+    ; CHECK-NEXT: v_exp_f32_e32 v2, v2
+    ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_3)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+    $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+    S_DELAY_ALU .id0_TRANS32_DEP_3
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}salu_cycle_1:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: s_mov_b32 s0, 0
+    ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0
+    $sgpr0 = S_MOV_B32 0
+    S_DELAY_ALU .id0_SALU_CYCLE_1
+    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_same_trans32_dep_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}valu_dep_1_same_trans32_dep_1:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_exp_f32_e32 v0, v0
+    ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+    ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v1
+    $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    S_DELAY_ALU .id0_TRANS32_DEP_1_skip_SAME_id1_VALU_DEP_1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_same_salu_cycle_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}valu_dep_1_same_salu_cycle_1:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: s_mov_b32 s0, 0
+    ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $sgpr0 = S_MOV_B32 0
+    S_DELAY_ALU .id0_VALU_DEP_1_skip_SAME_id1_SALU_CYCLE_1
+    $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_next_valu_dep_1
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}valu_dep_1_next_valu_dep_1:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    S_DELAY_ALU .id0_VALU_DEP_1_skip_NEXT_id1_VALU_DEP_1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2_next_valu_dep_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}valu_dep_2_next_valu_dep_2:
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+    ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    S_DELAY_ALU .id0_VALU_DEP_2_skip_NEXT_id1_VALU_DEP_2
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_2_skip_valu_dep_2
+body: |
+  bb.0:
+    ; CHECK-LABEL: {{^}}valu_dep_2_skip_valu_dep_2
+    ; CHECK: %bb.0:
+    ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+    ; CHECK-NEXT: v_add_nc_u32_e32 v2, v1, v1
+    ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+    ; CHECK-NEXT: v_add_nc_u32_e32 v1, v0, v1
+    ; CHECK-NEXT: v_add_nc_u32_e32 v4, v3, v3
+    ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+    S_DELAY_ALU .id0_VALU_DEP_2_skip_SKIP_1_id1_VALU_DEP_2
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr4 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir
index 70b95b06e402c..b3bf9081b0d46 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir
@@ -13,7 +13,7 @@ body: |
     ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
     ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
     ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32_gfx11 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; CHECK-NEXT: S_DELAY_ALU 1
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1
     ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32_gfx11 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     $vgpr0 = IMPLICIT_DEF
     $vgpr1 = IMPLICIT_DEF
@@ -37,7 +37,7 @@ body: |
     ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF
     ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF
     ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32_gfx11 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-    ; CHECK-NEXT: S_DELAY_ALU 1
+    ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1
     ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32_gfx11 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
     $vgpr0 = IMPLICIT_DEF
     $vgpr1 = IMPLICIT_DEF


        


More information about the llvm-commits mailing list