[llvm] [AMDGPU] Add GFX12 test coverage for vmcnt flushing in loop headers (PR #105548)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 22 03:36:54 PDT 2024


https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/105548

>From 362517a201365a82554f68ce03fc37c9451407c5 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 21 Aug 2024 16:40:20 +0100
Subject: [PATCH] [AMDGPU] Add GFX12 test coverage for vmcnt flushing in loop
 headers

---
 .../CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir     | 129 ++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
index 2417becb7c2167..e51174919b8d3a 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
@@ -1,5 +1,6 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s
 
 ---
 
@@ -20,6 +21,13 @@
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop
 body:             |
   bb.0:
@@ -58,6 +66,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_noterm
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_noterm
 body:             |
   bb.0:
@@ -129,6 +144,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_load
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_load
 body:             |
   bb.0:
@@ -170,6 +192,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_no_store
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_no_store
 body:             |
   bb.0:
@@ -212,6 +241,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_no_use
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_no_use
 body:             |
   bb.0:
@@ -255,6 +291,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2
+# GFX12-LABEL: bb.0:
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2
 body:             |
   bb.0:
@@ -294,6 +338,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_store
+# GFX12-LABEL: bb.0:
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_store
 body:             |
   bb.0:
@@ -334,6 +386,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_use_in_loop
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_use_in_loop
 body:             |
   bb.0:
@@ -379,6 +438,15 @@ body:             |
 # GFX10-LABEL: bb.2:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.3:
+
+# GFX12-LABEL: waitcnt_vm_loop2_nowait
+# GFX12-LABEL: bb.0:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.3:
 name:            waitcnt_vm_loop2_nowait
 body:             |
   bb.0:
@@ -427,6 +495,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_reginterval
+# GFX12-LABEL: bb.0:
+# GFX12: GLOBAL_LOAD_DWORDX4
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_reginterval
 body:             |
   bb.0:
@@ -467,6 +543,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_reginterval2
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_reginterval2
 body:             |
   bb.0:
@@ -513,6 +596,15 @@ body:             |
 # GFX10-NOT: S_WAITCNT 16240
 # GFX10-LABEL: bb.2:
 
+# GFX12-LABEL: waitcnt_vm_zero
+# GFX12-LABEL: bb.0:
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+
 name:            waitcnt_vm_zero
 body:             |
   bb.0:
@@ -548,6 +640,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT
 
+# GFX12-LABEL: waitcnt_vm_necessary
+# GFX12-LABEL: bb.0:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12: $vgpr4
+# GFX12-NOT: S_WAITCNT
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAITCNT
+
 # GFX9-LABEL: waitcnt_vm_necessary
 # GFX9-LABEL: bb.0:
 # GFX9: S_WAITCNT 3952
@@ -590,6 +690,13 @@ body:             |
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
 
+# GFX12-LABEL: waitcnt_vm_loop_global_mem
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+
 name:            waitcnt_vm_loop_global_mem
 body:             |
   bb.0:
@@ -631,6 +738,13 @@ body:             |
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
 
+# GFX12-LABEL: waitcnt_vm_loop_scratch_mem
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+
 name:            waitcnt_vm_loop_scratch_mem
 body:             |
   bb.0:
@@ -671,6 +785,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 11
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_flat_mem
+# GFX12-LABEL: bb.0:
+# GFX12: FLAT_LOAD_DWORD
+# GFX12-NOT: S_WAIT_LOADCNT_DSCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT_DSCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_flat_mem
 body:             |
   bb.0:
@@ -713,6 +835,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_flat_load
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_flat_load
 body:             |
   bb.0:



More information about the llvm-commits mailing list