[llvm] [AMDGPU] Introduce a pseudo mnemonic for S_DELAY_ALU in MIR. (PR #96004)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 18 15:33:15 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Michael Bedy (mjbedy)
<details>
<summary>Changes</summary>
---
Patch is 22.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96004.diff
5 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp (+154)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h (+23)
- (added) llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir (+175)
- (added) llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir (+198)
- (modified) llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir (+2-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
index 6ec4178053b20..7bca5b3bb9ef6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
@@ -17,6 +17,160 @@
using namespace llvm;
+void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI,
+ std::optional<unsigned int> OpIdx, int64_t Imm) const {
+
+ switch(MI.getOpcode()) {
+ case AMDGPU::S_DELAY_ALU:
+ assert(OpIdx == 0);
+ printSDelayAluImm(Imm, OS);
+ break;
+ default:
+ MIRFormatter::printImm(OS, MI, OpIdx, Imm);
+ break;
+ }
+}
+
+/// Implement target specific parsing of immediate mnemonics. The mnemonic is
+/// dot seperated strings.
+bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode,
+ const unsigned OpIdx,
+ StringRef Src, int64_t &Imm,
+ ErrorCallbackType ErrorCallback) const
+{
+
+ switch(OpCode) {
+ case AMDGPU::S_DELAY_ALU:
+ return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback);
+ default:
+ break;
+ }
+ return true; // Don't know what this is
+}
+
+void AMDGPUMIRFormatter::printSDelayAluImm(int64_t Imm,
+ llvm::raw_ostream &OS) const {
+ // Construct an immediate string to represent the information encoded in the
+ // s_delay_alu immediate.
+ // .id0_<dep>[_skip_<count>_id1<dep>]
+ constexpr int64_t None = 0;
+ constexpr int64_t Same = 0;
+
+ uint64_t Id0 = (Imm & 0xF);
+ uint64_t Skip = ((Imm >> 4) & 0x7);
+ uint64_t Id1 = ((Imm >> 7) & 0xF);
+ auto outdep = [&](uint64_t Id) {
+ if (Id == None) {
+ OS << "NONE";
+ } else if (Id < 5) {
+ OS << "VALU_DEP_" << Id;
+ } else if (Id < 8) {
+ OS << "TRANS32_DEP_" << Id - 4;
+ } else {
+ OS << "SALU_CYCLE_" << Id - 8;
+ }
+ };
+
+ OS << ".id0_";
+ outdep(Id0);
+
+ // If the second inst is "same" and "none", no need to print the rest of the
+ // string.
+ if (Skip == Same && Id1 == None)
+ return;
+
+ // Encode the second delay specification.
+ OS << "_skip_";
+ if (Skip == 0) {
+ OS << "SAME";
+ } else if (Skip == 1) {
+ OS << "NEXT";
+ } else {
+ OS << "SKIP_" << Skip - 1;
+ }
+ OS << "_id1_";
+ outdep(Id1);
+}
+
+bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic(
+ const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src,
+ llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) const
+{
+ assert(OpIdx == 0);
+
+ Imm = 0;
+ bool expected = Src.consume_front(".id0_");
+ if (!expected) {
+ return ErrorCallback(Src.begin(), "Expected .id0_");
+ }
+
+ auto expect_int = [&](StringRef &Src, int64_t Offset) -> int64_t {
+ int64_t Dep;
+ if (!Src.consumeInteger(10, Dep)) {
+ return Dep + Offset;
+ } else {
+ return -1;
+ }
+ };
+
+ auto decode_delay = [&](StringRef &Src) -> int64_t {
+ if (Src.consume_front("NONE")) {
+ return 0;
+ } else if (Src.consume_front("VALU_DEP_")) {
+ return expect_int(Src, 0);
+ } else if (Src.consume_front("TRANS32_DEP_")) {
+ return expect_int(Src, 4);
+ } else if (Src.consume_front("SALU_CYCLE_")) {
+ return expect_int(Src, 8);
+ }
+ return -1;
+ };
+
+ int64_t Delay0 = decode_delay(Src);
+ int64_t Skip = 0;
+ int64_t Delay1 = 0;
+ if (Delay0 == -1) {
+ return ErrorCallback(Src.begin(), "Could not decode delay0");
+ }
+
+ // Set the Imm so far, to that early return has the correct value.
+ Imm = Delay0;
+
+ // If that was the end of the string, the second instruction is "same" and
+ // "none"
+ if (Src.begin() == Src.end())
+ return false;
+
+ expected = Src.consume_front("_skip_");
+ if (!expected) {
+ return ErrorCallback(Src.begin(), "Expected _skip_");
+ }
+
+ if (Src.consume_front("SAME")) {
+ Skip = 0;
+ } else if (Src.consume_front("NEXT")) {
+ Skip = 1;
+ } else if (Src.consume_front("SKIP_")) {
+ if (Src.consumeInteger(10, Skip)) {
+ return ErrorCallback(Src.begin(), "Expected integer Skip value");
+ }
+ } else {
+ ErrorCallback(Src.begin(), "Unexpected Skip Value");
+ }
+
+ expected = Src.consume_front("_id1_");
+ if (!expected) {
+ return ErrorCallback(Src.begin(), "Expected _id1_");
+ }
+
+ Delay1 = decode_delay(Src);
+ if (Delay1 == -1) {
+ return ErrorCallback(Src.begin(), "Could not decode delay1");
+ }
+ Imm = Imm | (Skip << 4) | (Delay1 << 7);
+ return false;
+}
+
bool AMDGPUMIRFormatter::parseCustomPseudoSourceValue(
StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS,
const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
index 98b5031071cf4..80bb3dfe7a364 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
@@ -28,12 +28,35 @@ class AMDGPUMIRFormatter final : public MIRFormatter {
AMDGPUMIRFormatter() = default;
virtual ~AMDGPUMIRFormatter() = default;
+ /// Implement target specific printing for machine operand immediate value, so
+ /// that we can have more meaningful mnemonic than a 64-bit integer. Passing
+ /// None to OpIdx means the index is unknown.
+ virtual void printImm(raw_ostream &OS, const MachineInstr &MI,
+ std::optional<unsigned> OpIdx,
+ int64_t Imm) const override;
+
+ /// Implement target specific parsing of immediate mnemonics. The mnemonic is
+ /// dot seperated strings.
+ virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
+ StringRef Src, int64_t &Imm,
+ ErrorCallbackType ErrorCallback) const override;
+
/// Implement target specific parsing of target custom pseudo source value.
bool
parseCustomPseudoSourceValue(StringRef Src, MachineFunction &MF,
PerFunctionMIParsingState &PFS,
const PseudoSourceValue *&PSV,
ErrorCallbackType ErrorCallback) const override;
+
+private:
+ /// Print the string to represent s_delay_alu immediate value
+ void printSDelayAluImm(int64_t Imm, llvm::raw_ostream &OS) const;
+
+ /// Parse the immediate pseudo literal for s_delay_alu
+ bool parseSDelayAluImmMnemonic(
+ const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src,
+ llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) const;
+
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
new file mode 100644
index 0000000000000..7788e50ed4d24
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir
@@ -0,0 +1,175 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-delay-alu %s -o - | FileCheck %s
+
+---
+name: valu_dep_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: valu_dep_1
+ ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: valu_dep_2
+ ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_3
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: valu_dep_3
+ ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_3
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_4
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: valu_dep_4
+ ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_4
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: trans32_dep_1
+ ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: trans32_dep_2
+ ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_2
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_3
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: trans32_dep_3
+ ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_3
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: salu_cycle_1
+ ; CHECK: $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: S_DELAY_ALU .id0_SALU_CYCLE_1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_same_trans32_dep_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: valu_dep_1_same_trans32_dep_1
+ ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_1_skip_SAME_id1_VALU_DEP_1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_same_salu_cycle_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: valu_dep_1_same_salu_cycle_1
+ ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1_skip_SAME_id1_SALU_CYCLE_1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_next_valu_dep_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: valu_dep_1_next_valu_dep_1
+ ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1_skip_NEXT_id1_VALU_DEP_1
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2_next_valu_dep_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: valu_dep_2_next_valu_dep_2
+ ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2_skip_NEXT_id1_VALU_DEP_2
+ ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir
new file mode 100644
index 0000000000000..0d264629d4b72
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir
@@ -0,0 +1,198 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -start-after=amdgpu-insert-delay-alu %s -o - | FileCheck %s
+
+---
+name: valu_dep_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}valu_dep_1:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ S_DELAY_ALU .id0_VALU_DEP_1
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}valu_dep_2:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+ ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ S_DELAY_ALU .id0_VALU_DEP_2
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_3
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}valu_dep_3:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+ ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2
+ ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ S_DELAY_ALU .id0_VALU_DEP_3
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_4
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}valu_dep_4:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+ ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2
+ ; CHECK-NEXT: v_add_nc_u32_e32 v3, v3, v3
+ ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_4)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
+ $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
+ S_DELAY_ALU .id0_VALU_DEP_4
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}trans32_dep_1:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_exp_f32_e32 v0, v0
+ ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ S_DELAY_ALU .id0_TRANS32_DEP_1
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}trans32_dep_2:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_exp_f32_e32 v0, v0
+ ; CHECK-NEXT: v_exp_f32_e32 v1, v1
+ ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ S_DELAY_ALU .id0_TRANS32_DEP_2
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: trans32_dep_3
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}trans32_dep_3:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_exp_f32_e32 v0, v0
+ ; CHECK-NEXT: v_exp_f32_e32 v1, v1
+ ; CHECK-NEXT: v_exp_f32_e32 v2, v2
+ ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_3)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
+ $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
+ S_DELAY_ALU .id0_TRANS32_DEP_3
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: salu_cycle_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}salu_cycle_1:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: s_mov_b32 s0, 0
+ ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0
+ $sgpr0 = S_MOV_B32 0
+ S_DELAY_ALU .id0_SALU_CYCLE_1
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_same_trans32_dep_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}valu_dep_1_same_trans32_dep_1:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_exp_f32_e32 v0, v0
+ ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1
+ ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v1
+ $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
+ $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+ S_DELAY_ALU .id0_TRANS32_DEP_1_skip_SAME_id1_VALU_DEP_1
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+...
+
+---
+name: valu_dep_1_same_salu_cycle_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}valu_dep_1_same_salu_cycle_1:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ ; CHECK-NEXT: s_mov_b32 s0, 0
+ ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ S_DELAY_ALU .id0_VALU_DEP_1_skip_SAME_id1_SALU_CYCLE_1
+ $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_1_next_valu_dep_1
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}valu_dep_1_next_valu_dep_1:
+ ; CHECK: %bb.0:
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ S_DELAY_ALU .id0_VALU_DEP_1_skip_NEXT_id1_VALU_DEP_1
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: valu_dep_2_next_valu_dep_2
+body: |
+ bb.0:
+ ; CHECK-LABEL: {{^}}valu_dep_2_next_valu_dep_2:
+ ; CHECK: %bb.0:
+ ; CHEC...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/96004
More information about the llvm-commits
mailing list