[llvm] [AMDGPU] Fix setting nontemporal in memory legalizer (PR #83815)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 4 02:10:52 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
Iterator MI can advance in insertWait() but we need original instruction to
set temporal hint. Just move it before handling volatile.
---
Patch is 1.84 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83815.diff
24 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll (+352)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll (+2102)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll (+267)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll (+1866)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll (+2102)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll (+156)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll (+1844)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll (+1918)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll (+2216)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll (+256)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll (+2170)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll (+2128)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll (+151)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll (+2170)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll (+2206)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll (+1836)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll (+281)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll (+1770)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll (+1836)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll (+139)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll (+1770)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll (+1836)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll (+203)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 4069a368f68719..73c23f0f987c3d 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -2392,6 +2392,11 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
bool Changed = false;
+ if (IsNonTemporal) {
+ // Set non-temporal hint for all cache levels.
+ Changed |= setTH(MI, AMDGPU::CPol::TH_NT);
+ }
+
if (IsVolatile) {
Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
@@ -2407,11 +2412,6 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
Position::AFTER);
}
- if (IsNonTemporal) {
- // Set non-temporal hint for all cache levels.
- Changed |= setTH(MI, AMDGPU::CPol::TH_NT);
- }
-
return Changed;
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index 77962fadcacfc6..e13542f61474e2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -10,6 +10,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX6-LABEL: singlethread_acquire_fence:
@@ -55,6 +57,14 @@ define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX11-CU-LABEL: singlethread_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acquire
ret void
@@ -104,6 +114,14 @@ define amdgpu_kernel void @singlethread_release_fence() {
; GFX11-CU-LABEL: singlethread_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") release
ret void
@@ -153,6 +171,14 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() {
; GFX11-CU-LABEL: singlethread_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acq_rel
ret void
@@ -202,6 +228,14 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() {
; GFX11-CU-LABEL: singlethread_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") seq_cst
ret void
@@ -251,6 +285,14 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() {
; GFX11-CU-LABEL: singlethread_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acquire
ret void
@@ -300,6 +342,14 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() {
; GFX11-CU-LABEL: singlethread_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") release
ret void
@@ -349,6 +399,14 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() {
; GFX11-CU-LABEL: singlethread_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
@@ -398,6 +456,14 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() {
; GFX11-CU-LABEL: singlethread_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
@@ -447,6 +513,14 @@ define amdgpu_kernel void @wavefront_acquire_fence() {
; GFX11-CU-LABEL: wavefront_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acquire
ret void
@@ -496,6 +570,14 @@ define amdgpu_kernel void @wavefront_release_fence() {
; GFX11-CU-LABEL: wavefront_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") release
ret void
@@ -545,6 +627,14 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() {
; GFX11-CU-LABEL: wavefront_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acq_rel
ret void
@@ -594,6 +684,14 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() {
; GFX11-CU-LABEL: wavefront_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") seq_cst
ret void
@@ -643,6 +741,14 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() {
; GFX11-CU-LABEL: wavefront_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acquire
ret void
@@ -692,6 +798,14 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() {
; GFX11-CU-LABEL: wavefront_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") release
ret void
@@ -741,6 +855,14 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() {
; GFX11-CU-LABEL: wavefront_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
@@ -790,6 +912,14 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() {
; GFX11-CU-LABEL: wavefront_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
@@ -843,6 +973,15 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX11-CU-LABEL: workgroup_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
ret void
@@ -892,6 +1031,14 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX11-CU-LABEL: workgroup_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
ret void
@@ -945,6 +1092,15 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX11-CU-LABEL: workgroup_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
ret void
@@ -998,6 +1154,15 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX11-CU-LABEL: workgroup_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
ret void
@@ -1051,6 +1216,15 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX11-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
ret void
@@ -1100,6 +1274,14 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX11-CU-LABEL: workgroup_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
ret void
@@ -1153,6 +1335,15 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
@@ -1206,6 +1397,15 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
@@ -1269,6 +1469,16 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire
ret void
@@ -1320,6 +1530,14 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX11-CU-LABEL: agent_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") release
ret void
@@ -1385,6 +1603,16 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel
ret void
@@ -1450,6 +1678,16 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst
ret void
@@ -1513,6 +1751,16 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire
ret void
@@ -1564,6 +1812,14 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX11-CU-LABEL: agent_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release
ret void
@@ -1629,6 +1885,16 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel
ret void
@@ -1694,6 +1960,16 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: agent_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst
ret void
@@ -1759,6 +2035,16 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: system_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: system_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-CU-NEXT: s_endpgm
entry:
fence acquire
ret void
@@ -1812,6 +2098,14 @@ define amdgpu_kernel void @system_release_fence() {
; GFX11-CU-LABEL: system_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: system_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: system_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence release
ret void
@@ -1881,6 +2175,16 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: system_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: system_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-CU-NEXT: s_endpgm
entry:
fence acq_rel
ret void
@@ -1950,6 +2254,16 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: system_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: system_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-CU-NEXT: s_endpgm
e...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83815
More information about the llvm-commits
mailing list