[llvm] ec1f28d - AMDGPU/gfx12: avoid crashing on legacy waitcnt intrinsics (#92306)

via llvm-commits llvm-commits at lists.llvm.org
Wed May 15 13:23:23 PDT 2024


Author: Nicolai Hähnle
Date: 2024-05-15T22:23:18+02:00
New Revision: ec1f28dc97ce22ba5b3e6f95ff84414dfbda46b0

URL: https://github.com/llvm/llvm-project/commit/ec1f28dc97ce22ba5b3e6f95ff84414dfbda46b0
DIFF: https://github.com/llvm/llvm-project/commit/ec1f28dc97ce22ba5b3e6f95ff84414dfbda46b0.diff

LOG: AMDGPU/gfx12: avoid crashing on legacy waitcnt intrinsics (#92306)

They *are* still accepted by the HW but have a conservative effect.

Leave them untouched since handling them would complicate the logic a
bit, and developers who code to such a low level really need to revisit
what they're doing anyway.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
    llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 839ac927a0ee4..5577ce9eb1282 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1364,6 +1364,11 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
     unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(II.getOpcode());
     bool TrySimplify = Opcode != II.getOpcode() && !OptNone;
 
+    // Don't crash if the programmer used legacy waitcnt intrinsics, but don't
+    // attempt to do more than that either.
+    if (Opcode == AMDGPU::S_WAITCNT)
+      continue;
+
     if (Opcode == AMDGPU::S_WAIT_LOADCNT_DSCNT) {
       unsigned OldEnc =
           TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm();

diff  --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
index 4c01786e45f55..e15814210dfd9 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
@@ -1,5 +1,12 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX12 %s
+
+# For gfx12+, this test simply ensures that we don't crash in the face of manually
+# inserted waitcnt intrinsics. They are still allowed for compatibility, but
+# their effect in the HW is very conservative and code generation does not attempt
+# to do anything with them. Developers who write code at such a low level should
+# revisit their code for gfx12+ anyway.
 
 ---
 name:            test_waitcnt_preexisting_lgkmcnt_unmodified
@@ -17,6 +24,22 @@ body:             |
     ; GFX9-NEXT: S_WAITCNT 112
     ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_unmodified
+    ; GFX12: liveins: $vgpr0
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
+    ; GFX12-NEXT: S_WAITCNT 49279
+    ; GFX12-NEXT: S_WAIT_DSCNT 0
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
     S_WAITCNT 49279
     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -40,6 +63,22 @@ body:             |
     ; GFX9-NEXT: S_WAITCNT 112
     ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_unmodified
+    ; GFX12: liveins: $vgpr0_vgpr1
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+    ; GFX12-NEXT: S_WAITCNT 3952
+    ; GFX12-NEXT: S_WAIT_LOADCNT 0
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
     S_WAITCNT 3952
     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -65,6 +104,22 @@ body:             |
     ; GFX9-NEXT: S_WAITCNT 112
     ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_vmcnt_needs_lgkmcnt
+    ; GFX12: liveins: $vgpr0
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
+    ; GFX12-NEXT: S_WAITCNT 3952
+    ; GFX12-NEXT: S_WAIT_DSCNT 0
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec
     S_WAITCNT 3952
     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -88,6 +143,22 @@ body:             |
     ; GFX9-NEXT: S_WAITCNT 112
     ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_lgkmcnt_needs_vmcnt
+    ; GFX12: liveins: $vgpr0_vgpr1
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+    ; GFX12-NEXT: S_WAITCNT 49279
+    ; GFX12-NEXT: S_WAIT_LOADCNT 0
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
     S_WAITCNT 49279
     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
@@ -115,6 +186,24 @@ body:             |
     ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_WAITCNT 112
     ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_apply_all_counters
+    ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+    ; GFX12-NEXT: $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
+    ; GFX12-NEXT: S_WAITCNT 0
+    ; GFX12-NEXT: S_WAIT_DSCNT 0
+    ; GFX12-NEXT: $vgpr6 = V_OR_B32_e32 1, killed $vgpr6, implicit $exec
+    ; GFX12-NEXT: S_WAIT_LOADCNT 0
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
     $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec
     S_WAITCNT 0
@@ -136,6 +225,24 @@ body:             |
     ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_WAITCNT 0
     ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt
+    ; GFX12: liveins: $vgpr0_vgpr1
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAITCNT 0
+    ; GFX12-NEXT: S_WAITCNT 0
+    ; GFX12-NEXT: S_WAITCNT 0
+    ; GFX12-NEXT: S_WAITCNT 0
+    ; GFX12-NEXT: S_WAITCNT 0
+    ; GFX12-NEXT: S_WAITCNT 0
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
     S_WAITCNT 0
     S_WAITCNT 0
@@ -159,6 +266,20 @@ body:             |
     ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_WAITCNT 112
     ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_combine_waitcnt_
diff _counters
+    ; GFX12: liveins: $vgpr0_vgpr1
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAITCNT 49279
+    ; GFX12-NEXT: S_WAITCNT 3952
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
     S_WAITCNT 49279
     S_WAITCNT 3952
@@ -185,6 +306,23 @@ body:             |
     ; GFX9-NEXT: S_NOP 0
     ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_early_wait
+    ; GFX12: liveins: $vgpr0_vgpr1
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAITCNT 0
+    ; GFX12-NEXT: S_NOP 0
+    ; GFX12-NEXT: S_NOP 0
+    ; GFX12-NEXT: S_NOP 0
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_ENDPGM 0
     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
     S_WAITCNT 0
     S_NOP 0
@@ -207,6 +345,18 @@ body:             |
     ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_WAITCNT 3952
     ; GFX9-NEXT: KILL $vgpr0
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_ignore_kill
+    ; GFX12: liveins: $vgpr0_vgpr1
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAITCNT 3952
+    ; GFX12-NEXT: KILL $vgpr0
     $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
     S_WAITCNT 3952
     KILL $vgpr0
@@ -221,6 +371,15 @@ body:             |
     ; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
     ; GFX9: S_WAITCNT 0
     ; GFX9-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_func_start
+    ; GFX12: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: S_WAITCNT 0
+    ; GFX12-NEXT: S_ENDPGM 0
     S_WAITCNT 0
     S_ENDPGM 0
 ...
@@ -241,6 +400,22 @@ body:             |
     ; GFX9-NEXT: S_WAITCNT 112
     ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-NEXT: S_ENDPGM 0
+    ;
+    ; GFX12-LABEL: name: test_waitcnt_preexisting_buffer_inv
+    ; GFX12: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: S_WAIT_EXPCNT 0
+    ; GFX12-NEXT: S_WAIT_SAMPLECNT 0
+    ; GFX12-NEXT: S_WAIT_BVHCNT 0
+    ; GFX12-NEXT: S_WAIT_KMCNT 0
+    ; GFX12-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+    ; GFX12-NEXT: S_WAITCNT 3952
+    ; GFX12-NEXT: BUFFER_INVL2 implicit $exec
+    ; GFX12-NEXT: S_WAIT_LOADCNT 0
+    ; GFX12-NEXT: BUFFER_WBINVL1_VOL implicit $exec
+    ; GFX12-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0
+    ; GFX12-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX12-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
     S_WAITCNT 3952
     BUFFER_INVL2 implicit $exec


        


More information about the llvm-commits mailing list