[llvm] [AMDGPU] Add GFX12 S_WAIT_* instructions (PR #77336)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 8 08:29:45 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
GFX12 has separate wait instructions per counter e.g. S_WAIT_LOADCNT. S_WAITCNT still exists but is deprecated and codegen should stop using it. S_WAITCNT_* (e.g. S_WAITCNT_VSCNT) are removed.
This patch adds/removes MC layer support for these instructions.
---
Full diff: https://github.com/llvm/llvm-project/pull/77336.diff
6 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+35-7)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_err.s (+5)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_sopp.s (+54)
- (modified) llvm/test/MC/AMDGPU/gfx12_unsupported.s (+12)
- (modified) llvm/test/MC/Disassembler/AMDGPU/decode-err.txt (+4)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt (+54)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index c9687ac368d305..1b17dea7ba88a9 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1186,14 +1186,12 @@ let SubtargetPredicate = isGFX10Plus in {
let SubtargetPredicate = isGFX10GFX11 in {
def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
def S_SUBVECTOR_LOOP_END : SOPK_32_BR<"s_subvector_loop_end">;
-} // End SubtargetPredicate = isGFX10GFX11
-let SubtargetPredicate = isGFX10Plus in {
def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">;
def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
def S_WAITCNT_EXPCNT : SOPK_WAITCNT<"s_waitcnt_expcnt">;
def S_WAITCNT_LGKMCNT : SOPK_WAITCNT<"s_waitcnt_lgkmcnt">;
-} // End SubtargetPredicate = isGFX10Plus
+} // End SubtargetPredicate = isGFX10GFX11
//===----------------------------------------------------------------------===//
// SOPC Instructions
@@ -1702,6 +1700,27 @@ let SubtargetPredicate = HasVGPRSingleUseHintInsts in {
SOPP_Pseudo<"s_singleuse_vdst", (ins s16imm:$simm16), "$simm16">;
} // End SubtargetPredicate = HasVGPRSingeUseHintInsts
+let SubtargetPredicate = isGFX12Plus, hasSideEffects = 1 in {
+ def S_WAIT_LOADCNT :
+ SOPP_Pseudo<"s_wait_loadcnt", (ins s16imm:$simm16), "$simm16">;
+ def S_WAIT_LOADCNT_DSCNT :
+ SOPP_Pseudo<"s_wait_loadcnt_dscnt", (ins s16imm:$simm16), "$simm16">;
+ def S_WAIT_STORECNT :
+ SOPP_Pseudo<"s_wait_storecnt", (ins s16imm:$simm16), "$simm16">;
+ def S_WAIT_STORECNT_DSCNT :
+ SOPP_Pseudo<"s_wait_storecnt_dscnt", (ins s16imm:$simm16), "$simm16">;
+ def S_WAIT_SAMPLECNT :
+ SOPP_Pseudo<"s_wait_samplecnt", (ins s16imm:$simm16), "$simm16">;
+ def S_WAIT_BVHCNT :
+ SOPP_Pseudo<"s_wait_bvhcnt", (ins s16imm:$simm16), "$simm16">;
+ def S_WAIT_EXPCNT :
+ SOPP_Pseudo<"s_wait_expcnt", (ins s16imm:$simm16), "$simm16">;
+ def S_WAIT_DSCNT :
+ SOPP_Pseudo<"s_wait_dscnt", (ins s16imm:$simm16), "$simm16">;
+ def S_WAIT_KMCNT :
+ SOPP_Pseudo<"s_wait_kmcnt", (ins s16imm:$simm16), "$simm16">;
+} // End SubtargetPredicate = isGFX12Plus, hasSideEffects = 1
+
//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
@@ -2411,10 +2430,10 @@ defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx11_gfx12<0x013>;
defm S_CALL_B64 : SOPK_Real32_gfx11_gfx12<0x014>;
defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx11<0x016>;
defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx11<0x017>;
-defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11_gfx12<0x018>;
-defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11_gfx12<0x019>;
-defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11_gfx12<0x01a>;
-defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11_gfx12<0x01b>;
+defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11<0x018>;
+defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11<0x019>;
+defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11<0x01a>;
+defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11<0x01b>;
//===----------------------------------------------------------------------===//
// SOPK - GFX10.
@@ -2516,6 +2535,15 @@ multiclass SOPP_Real_32_Renamed_gfx12<bits<7> op, SOPP_Pseudo backing_pseudo, st
defm S_WAIT_ALU : SOPP_Real_32_Renamed_gfx12<0x008, S_WAITCNT_DEPCTR, "s_wait_alu">;
defm S_BARRIER_WAIT : SOPP_Real_32_gfx12<0x014>;
defm S_BARRIER_LEAVE : SOPP_Real_32_gfx12<0x015>;
+defm S_WAIT_LOADCNT : SOPP_Real_32_gfx12<0x040>;
+defm S_WAIT_STORECNT : SOPP_Real_32_gfx12<0x041>;
+defm S_WAIT_SAMPLECNT : SOPP_Real_32_gfx12<0x042>;
+defm S_WAIT_BVHCNT : SOPP_Real_32_gfx12<0x043>;
+defm S_WAIT_EXPCNT : SOPP_Real_32_gfx12<0x044>;
+defm S_WAIT_DSCNT : SOPP_Real_32_gfx12<0x046>;
+defm S_WAIT_KMCNT : SOPP_Real_32_gfx12<0x047>;
+defm S_WAIT_LOADCNT_DSCNT : SOPP_Real_32_gfx12<0x048>;
+defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>;
//===----------------------------------------------------------------------===//
// SOPP - GFX11, GFX12.
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_err.s
index 088ee416692b88..916d6f05dab534 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_err.s
@@ -36,6 +36,11 @@ v_interp_p2_f32 v0, -v1, v2, v3 wait_exp
global_atomic_cmpswap_x2 v[1:4], v3, v[5:8], off offset:2047 glc
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// s_waitcnt_depctr is called s_wait_alu on GFX12, but its semantics and
+// encoding are identical. Even so, the new name should be rejected on GFX11
+s_wait_alu 0xfffe
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
v_cubesc_f32_e64_dpp v5, v1, v2, 12345678 row_shr:4 row_mask:0xf bank_mask:0xf
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
index cf78b87a476183..41ed4de6be8a72 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
@@ -1,5 +1,59 @@
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s
+s_wait_loadcnt 0x1234
+// GFX12: encoding: [0x34,0x12,0xc0,0xbf]
+
+s_wait_loadcnt 0xc1d1
+// GFX12: encoding: [0xd1,0xc1,0xc0,0xbf]
+
+s_wait_storecnt 0x1234
+// GFX12: encoding: [0x34,0x12,0xc1,0xbf]
+
+s_wait_storecnt 0xc1d1
+// GFX12: encoding: [0xd1,0xc1,0xc1,0xbf]
+
+s_wait_samplecnt 0x1234
+// GFX12: encoding: [0x34,0x12,0xc2,0xbf]
+
+s_wait_samplecnt 0xc1d1
+// GFX12: encoding: [0xd1,0xc1,0xc2,0xbf]
+
+s_wait_bvhcnt 0x1234
+// GFX12: encoding: [0x34,0x12,0xc3,0xbf]
+
+s_wait_bvhcnt 0xc1d1
+// GFX12: encoding: [0xd1,0xc1,0xc3,0xbf]
+
+s_wait_expcnt 0x1234
+// GFX12: encoding: [0x34,0x12,0xc4,0xbf]
+
+s_wait_expcnt 0xc1d1
+// GFX12: encoding: [0xd1,0xc1,0xc4,0xbf]
+
+s_wait_dscnt 0x1234
+// GFX12: encoding: [0x34,0x12,0xc6,0xbf]
+
+s_wait_dscnt 0xc1d1
+// GFX12: encoding: [0xd1,0xc1,0xc6,0xbf]
+
+s_wait_kmcnt 0x1234
+// GFX12: encoding: [0x34,0x12,0xc7,0xbf]
+
+s_wait_kmcnt 0xc1d1
+// GFX12: encoding: [0xd1,0xc1,0xc7,0xbf]
+
+s_wait_loadcnt_dscnt 0x1234
+// GFX12: encoding: [0x34,0x12,0xc8,0xbf]
+
+s_wait_loadcnt_dscnt 0xc1d1
+// GFX12: encoding: [0xd1,0xc1,0xc8,0xbf]
+
+s_wait_storecnt_dscnt 0x1234
+// GFX12: encoding: [0x34,0x12,0xc9,0xbf]
+
+s_wait_storecnt_dscnt 0xc1d1
+// GFX12: encoding: [0xd1,0xc1,0xc9,0xbf]
+
s_wait_alu 0xfffe
// GFX12: encoding: [0xfe,0xff,0x88,0xbf]
diff --git a/llvm/test/MC/AMDGPU/gfx12_unsupported.s b/llvm/test/MC/AMDGPU/gfx12_unsupported.s
index 44c85b8545c510..aabaf526dc2a86 100644
--- a/llvm/test/MC/AMDGPU/gfx12_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx12_unsupported.s
@@ -4,6 +4,18 @@
// Unsupported instructions.
//===----------------------------------------------------------------------===//
+s_waitcnt_expcnt exec_hi, 0x1234
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_waitcnt_lgkmcnt exec_hi, 0x1234
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_waitcnt_vmcnt exec_hi, 0x1234
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_waitcnt_vscnt exec_hi, 0x1234
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
s_subvector_loop_begin s0, 0x1234
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
index e1bb7ad5117158..d7417929a8e1b0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
@@ -10,6 +10,10 @@
# GFX11: [[@LINE+1]]:1: warning: invalid instruction encoding
0x34,0x12,0x93,0xbf
+# this is s_waitcnt_vscnt exec_hi, 0x1234, which is valid on gfx11, but not on gfx12
+# GFX12: [[@LINE+1]]:1: warning: invalid instruction encoding
+0x34,0x12,0x7f,0xbc
+
# W32: v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
# W64: [[@LINE+1]]:1: warning: invalid instruction encoding
0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
index 13ded15998fb22..6f4dc2423487e2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
@@ -6,6 +6,60 @@
# GFX12: s_wait_alu 0xfffe ; encoding: [0xfe,0xff,0x88,0xbf]
0xfe,0xff,0x88,0xbf
+# GFX12: s_wait_loadcnt 0x1234 ; encoding: [0x34,0x12,0xc0,0xbf]
+0x34,0x12,0xc0,0xbf
+
+# GFX12: s_wait_loadcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc0,0xbf]
+0xd1,0xc1,0xc0,0xbf
+
+# GFX12: s_wait_storecnt 0x1234 ; encoding: [0x34,0x12,0xc1,0xbf]
+0x34,0x12,0xc1,0xbf
+
+# GFX12: s_wait_storecnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc1,0xbf]
+0xd1,0xc1,0xc1,0xbf
+
+# GFX12: s_wait_samplecnt 0x1234 ; encoding: [0x34,0x12,0xc2,0xbf]
+0x34,0x12,0xc2,0xbf
+
+# GFX12: s_wait_samplecnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc2,0xbf]
+0xd1,0xc1,0xc2,0xbf
+
+# GFX12: s_wait_bvhcnt 0x1234 ; encoding: [0x34,0x12,0xc3,0xbf]
+0x34,0x12,0xc3,0xbf
+
+# GFX12: s_wait_bvhcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc3,0xbf]
+0xd1,0xc1,0xc3,0xbf
+
+# GFX12: s_wait_expcnt 0x1234 ; encoding: [0x34,0x12,0xc4,0xbf]
+0x34,0x12,0xc4,0xbf
+
+# GFX12: s_wait_expcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc4,0xbf]
+0xd1,0xc1,0xc4,0xbf
+
+# GFX12: s_wait_dscnt 0x1234 ; encoding: [0x34,0x12,0xc6,0xbf]
+0x34,0x12,0xc6,0xbf
+
+# GFX12: s_wait_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc6,0xbf]
+0xd1,0xc1,0xc6,0xbf
+
+# GFX12: s_wait_kmcnt 0x1234 ; encoding: [0x34,0x12,0xc7,0xbf]
+0x34,0x12,0xc7,0xbf
+
+# GFX12: s_wait_kmcnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc7,0xbf]
+0xd1,0xc1,0xc7,0xbf
+
+# GFX12: s_wait_loadcnt_dscnt 0x1234 ; encoding: [0x34,0x12,0xc8,0xbf]
+0x34,0x12,0xc8,0xbf
+
+# GFX12: s_wait_loadcnt_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc8,0xbf]
+0xd1,0xc1,0xc8,0xbf
+
+# GFX12: s_wait_storecnt_dscnt 0x1234 ; encoding: [0x34,0x12,0xc9,0xbf]
+0x34,0x12,0xc9,0xbf
+
+# GFX12: s_wait_storecnt_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc9,0xbf]
+0xd1,0xc1,0xc9,0xbf
+
# GFX12: s_singleuse_vdst 0x0 ; encoding: [0x00,0x00,0x93,0xbf]
0x00,0x00,0x93,0xbf
``````````
</details>
https://github.com/llvm/llvm-project/pull/77336
More information about the llvm-commits
mailing list