[llvm] ec1f28d - AMDGPU/gfx12: avoid crashing on legacy waitcnt intrinsics (#92306)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 15 13:23:23 PDT 2024
Author: Nicolai Hähnle
Date: 2024-05-15T22:23:18+02:00
New Revision: ec1f28dc97ce22ba5b3e6f95ff84414dfbda46b0
URL: https://github.com/llvm/llvm-project/commit/ec1f28dc97ce22ba5b3e6f95ff84414dfbda46b0
DIFF: https://github.com/llvm/llvm-project/commit/ec1f28dc97ce22ba5b3e6f95ff84414dfbda46b0.diff
LOG: AMDGPU/gfx12: avoid crashing on legacy waitcnt intrinsics (#92306)
They *are* still accepted by the HW but have a conservative effect.
Leave them untouched since handling them would complicate the logic a
bit, and developers who code to such a low level really need to revisit
what they're doing anyway.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 839ac927a0ee4..5577ce9eb1282 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1364,6 +1364,11 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(II.getOpcode());
bool TrySimplify = Opcode != II.getOpcode() && !OptNone;
+ // Don't crash if the programmer used legacy waitcnt intrinsics, but don't
+ // attempt to do more than that either.
+ if (Opcode == AMDGPU::S_WAITCNT)
+ continue;
+
if (Opcode == AMDGPU::S_WAIT_LOADCNT_DSCNT) {
unsigned OldEnc =
TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm();
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
index 4c01786e45f55..e15814210dfd9 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
@@ -1,5 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX12 %s
+
+# For gfx12+, this test simply ensures that we don't crash in the face of manually
+# inserted waitcnt intrinsics. They are still allowed for compatibility, but
+# their effect in the HW is very conservative and code generation does not attempt
+# to do anything with them. Developers who write code at such a low level should
+# revisit their code for gfx12+ anyway.
---
name: test_waitcnt_preexisting_lgkmcnt_unmodified
@@ -17,6 +24,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_unmodified
+ ; GFX12: liveins: $vgpr0
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 49279
+ ; GFX12-NEXT: S_WAIT_DSCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
S_WAITCNT 49279
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -40,6 +63,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_unmodified
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
S_WAITCNT 3952
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -65,6 +104,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_needs_lgkmcnt
+ ; GFX12: liveins: $vgpr0
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: S_WAIT_DSCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
S_WAITCNT 3952
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -88,6 +143,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_needs_vmcnt
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 49279
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
S_WAITCNT 49279
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -115,6 +186,24 @@ body: |
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_apply_all_counters
+ ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GFX12-NEXT: $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAIT_DSCNT 0
+ ; GFX12-NEXT: $vgpr6 = V_OR_B32_e32 1, killed $vgpr6, implicit $exec
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
$vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
S_WAITCNT 0
@@ -136,6 +225,24 @@ body: |
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_WAITCNT 0
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 0
S_WAITCNT 0
@@ -159,6 +266,20 @@ body: |
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt_
diff _counters
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAITCNT 49279
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 49279
S_WAITCNT 3952
@@ -185,6 +306,23 @@ body: |
; GFX9-NEXT: S_NOP 0
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_early_wait
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_NOP 0
+ ; GFX12-NEXT: S_NOP 0
+ ; GFX12-NEXT: S_NOP 0
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 0
S_NOP 0
@@ -207,6 +345,18 @@ body: |
; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_WAITCNT 3952
; GFX9-NEXT: KILL $vgpr0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_ignore_kill
+ ; GFX12: liveins: $vgpr0_vgpr1
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: KILL $vgpr0
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 3952
KILL $vgpr0
@@ -221,6 +371,15 @@ body: |
; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
; GFX9: S_WAITCNT 0
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_func_start
+ ; GFX12: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: S_WAITCNT 0
+ ; GFX12-NEXT: S_ENDPGM 0
S_WAITCNT 0
S_ENDPGM 0
...
@@ -241,6 +400,22 @@ body: |
; GFX9-NEXT: S_WAITCNT 112
; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: test_waitcnt_preexisting_buffer_inv
+ ; GFX12: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: S_WAIT_EXPCNT 0
+ ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+ ; GFX12-NEXT: S_WAIT_BVHCNT 0
+ ; GFX12-NEXT: S_WAIT_KMCNT 0
+ ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GFX12-NEXT: S_WAITCNT 3952
+ ; GFX12-NEXT: BUFFER_INVL2 implicit $exec
+ ; GFX12-NEXT: S_WAIT_LOADCNT 0
+ ; GFX12-NEXT: BUFFER_WBINVL1_VOL implicit $exec
+ ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
S_WAITCNT 3952
BUFFER_INVL2 implicit $exec
More information about the llvm-commits
mailing list