[llvm] AMDGPU/gfx12: avoid crashing on legacy waitcnt intrinsics (PR #92306)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 15 12:38:28 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Nicolai Hähnle (nhaehnle)
<details>
<summary>Changes</summary>
They *are* still accepted by the HW but have a conservative effect.
Leave them untouched since handling them would complicate the logic a bit, and developers who code to such a low level really need to revisit what they're doing anyway.
Change-Id: Ib018ff640f6c36f3888c9473a5eb4ab33fc42ed2
---
Full diff: https://github.com/llvm/llvm-project/pull/92306.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+5)
- (modified) llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir (+175)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 839ac927a0ee4..5577ce9eb1282 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1364,6 +1364,11 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(II.getOpcode());
bool TrySimplify = Opcode != II.getOpcode() && !OptNone;
+ // Don't crash if the programmer used legacy waitcnt intrinsics, but don't
+ // attempt to do more than that either.
+ if (Opcode == AMDGPU::S_WAITCNT)
+ continue;
+
if (Opcode == AMDGPU::S_WAIT_LOADCNT_DSCNT) {
unsigned OldEnc =
TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm();
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
index 4c01786e45f55..e15814210dfd9 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
@@ -1,5 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX12 %s
+
+# For gfx12+, this test simply ensures that we don't crash in the face of manually
+# inserted waitcnt intrinsics. They are still allowed for compatibility, but
+# their effect in the HW is very conservative and code generation does not attempt
+# to do anything with them. Developers who write code at such a low level should
+# revisit their code for gfx12+ anyway.
---
name: test_waitcnt_preexisting_lgkmcnt_unmodified
@@ -17,6 +24,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_unmodified
+ ; GFX12: liveins: $vgpr0
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 49279
+ ; GFX12-NEXT: S_WAIT_DSCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
S_WAITCNT 49279
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -40,6 +63,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_unmodified
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
S_WAITCNT 3952
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -65,6 +104,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_needs_lgkmcnt
+ ; GFX12: liveins: $vgpr0
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: S_WAIT_DSCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
S_WAITCNT 3952
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -88,6 +143,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_needs_vmcnt
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 49279
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
S_WAITCNT 49279
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -115,6 +186,24 @@ body: |
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_apply_all_counters
+ ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GFX12-NEXT: $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAIT_DSCNT 0
+ ; GFX12-NEXT: $vgpr6 = V_OR_B32_e32 1, killed $vgpr6, implicit $exec
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
S_WAITCNT 0
@@ -136,6 +225,24 @@ body: |
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_WAITCNT 0
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 0
S_WAITCNT 0
@@ -159,6 +266,20 @@ body: |
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt_diff_counters
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAITCNT 49279
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 49279
S_WAITCNT 3952
@@ -185,6 +306,23 @@ body: |
; GFX9-NEXT: S_NOP 0
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_early_wait
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_NOP 0
+ ; GFX12-NEXT: S_NOP 0
+ ; GFX12-NEXT: S_NOP 0
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 0
S_NOP 0
@@ -207,6 +345,18 @@ body: |
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_WAITCNT 3952
; GFX9-NEXT: KILL $vgpr0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_ignore_kill
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: KILL $vgpr0
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 3952
KILL $vgpr0
@@ -221,6 +371,15 @@ body: |
; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
; GFX9: S_WAITCNT 0
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_func_start
+ ; GFX12: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_ENDPGM 0
S_WAITCNT 0
S_ENDPGM 0
...
@@ -241,6 +400,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_buffer_inv
+ ; GFX12: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: BUFFER_INVL2 implicit $exec
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
+ ; GFX12-NEXT: BUFFER_WBINVL1_VOL implicit $exec
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
S_WAITCNT 3952
BUFFER_INVL2 implicit $exec
``````````
</details>
https://github.com/llvm/llvm-project/pull/92306
More information about the llvm-commits
mailing list