[llvm] [AMDGPU] Add GFX12 test coverage for vmcnt flushing in loop headers (PR #105548)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 03:36:54 PDT 2024
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/105548
>From 362517a201365a82554f68ce03fc37c9451407c5 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 21 Aug 2024 16:40:20 +0100
Subject: [PATCH] [AMDGPU] Add GFX12 test coverage for vmcnt flushing in loop
headers
---
.../CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir | 129 ++++++++++++++++++
1 file changed, 129 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
index 2417becb7c2167..e51174919b8d3a 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
@@ -1,5 +1,6 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s
---
@@ -20,6 +21,13 @@
# GFX10-LABEL: bb.1:
# GFX10: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop
body: |
bb.0:
@@ -58,6 +66,13 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_noterm
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop_noterm
body: |
bb.0:
@@ -129,6 +144,13 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_load
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop_load
body: |
bb.0:
@@ -170,6 +192,13 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_no_store
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop_no_store
body: |
bb.0:
@@ -212,6 +241,13 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10-NOT: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_no_use
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop_no_use
body: |
bb.0:
@@ -255,6 +291,14 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10-NOT: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2
+# GFX12-LABEL: bb.0:
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop2
body: |
bb.0:
@@ -294,6 +338,14 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10-NOT: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_store
+# GFX12-LABEL: bb.0:
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop2_store
body: |
bb.0:
@@ -334,6 +386,13 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_use_in_loop
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop2_use_in_loop
body: |
bb.0:
@@ -379,6 +438,15 @@ body: |
# GFX10-LABEL: bb.2:
# GFX10-NOT: S_WAITCNT 16
# GFX10-LABEL: bb.3:
+
+# GFX12-LABEL: waitcnt_vm_loop2_nowait
+# GFX12-LABEL: bb.0:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.3:
name: waitcnt_vm_loop2_nowait
body: |
bb.0:
@@ -427,6 +495,14 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10-NOT: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_reginterval
+# GFX12-LABEL: bb.0:
+# GFX12: GLOBAL_LOAD_DWORDX4
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop2_reginterval
body: |
bb.0:
@@ -467,6 +543,13 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_reginterval2
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop2_reginterval2
body: |
bb.0:
@@ -513,6 +596,15 @@ body: |
# GFX10-NOT: S_WAITCNT 16240
# GFX10-LABEL: bb.2:
+# GFX12-LABEL: waitcnt_vm_zero
+# GFX12-LABEL: bb.0:
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+
name: waitcnt_vm_zero
body: |
bb.0:
@@ -548,6 +640,14 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10-NOT: S_WAITCNT
+# GFX12-LABEL: waitcnt_vm_necessary
+# GFX12-LABEL: bb.0:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12: $vgpr4
+# GFX12-NOT: S_WAITCNT
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAITCNT
+
# GFX9-LABEL: waitcnt_vm_necessary
# GFX9-LABEL: bb.0:
# GFX9: S_WAITCNT 3952
@@ -590,6 +690,13 @@ body: |
# GFX10: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+# GFX12-LABEL: waitcnt_vm_loop_global_mem
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+
name: waitcnt_vm_loop_global_mem
body: |
bb.0:
@@ -631,6 +738,13 @@ body: |
# GFX10: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+# GFX12-LABEL: waitcnt_vm_loop_scratch_mem
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+
name: waitcnt_vm_loop_scratch_mem
body: |
bb.0:
@@ -671,6 +785,14 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10: S_WAITCNT 11
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_flat_mem
+# GFX12-LABEL: bb.0:
+# GFX12: FLAT_LOAD_DWORD
+# GFX12-NOT: S_WAIT_LOADCNT_DSCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT_DSCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop_flat_mem
body: |
bb.0:
@@ -713,6 +835,13 @@ body: |
# GFX10-LABEL: bb.1:
# GFX10: S_WAITCNT 16
# GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_flat_load
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
name: waitcnt_vm_loop_flat_load
body: |
bb.0:
More information about the llvm-commits
mailing list