[llvm] [AMDGPU][gfx1250] Add memory legalizer tests (NFC) (PR #154725)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 01:13:12 PDT 2025
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/154725
>From e030c29267df783c5b28ae4026d8850a7ec894a7 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 21 Aug 2025 12:13:54 +0200
Subject: [PATCH 1/2] [AMDGPU][gfx1250] Add memory legalizer tests
---
.../memory-legalizer-fence-mmra-global.ll | 171 ++
.../memory-legalizer-fence-mmra-local.ll | 100 +
.../CodeGen/AMDGPU/memory-legalizer-fence.ll | 239 +++
.../AMDGPU/memory-legalizer-flat-agent.ll | 1651 ++++++++++++++++
.../AMDGPU/memory-legalizer-flat-lastuse.ll | 51 +
.../memory-legalizer-flat-nontemporal.ll | 65 +
.../memory-legalizer-flat-singlethread.ll | 1241 ++++++++++++
.../AMDGPU/memory-legalizer-flat-system.ll | 1709 +++++++++++++++++
.../AMDGPU/memory-legalizer-flat-volatile.ll | 80 +
.../AMDGPU/memory-legalizer-flat-wavefront.ll | 1225 ++++++++++++
.../AMDGPU/memory-legalizer-flat-workgroup.ll | 1223 ++++++++++++
.../AMDGPU/memory-legalizer-global-agent.ll | 1613 ++++++++++++++++
.../AMDGPU/memory-legalizer-global-lastuse.ll | 56 +
.../memory-legalizer-global-nontemporal.ll | 67 +
.../memory-legalizer-global-singlethread.ll | 1241 ++++++++++++
.../AMDGPU/memory-legalizer-global-system.ll | 1586 +++++++++++++++
.../memory-legalizer-global-volatile.ll | 81 +
.../memory-legalizer-global-wavefront.ll | 1241 ++++++++++++
.../memory-legalizer-global-workgroup.ll | 1271 ++++++++++++
.../AMDGPU/memory-legalizer-local-agent.ll | 1211 ++++++++++++
.../memory-legalizer-local-nontemporal.ll | 69 +
.../memory-legalizer-local-singlethread.ll | 1165 +++++++++++
.../AMDGPU/memory-legalizer-local-system.ll | 1211 ++++++++++++
.../AMDGPU/memory-legalizer-local-volatile.ll | 80 +
.../memory-legalizer-local-wavefront.ll | 1165 +++++++++++
25 files changed, 19812 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
index 80445f793934b..20822c71198b6 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX6-LABEL: workgroup_acquire_fence:
@@ -78,6 +79,10 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU-LABEL: workgroup_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -145,6 +150,10 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU-LABEL: workgroup_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -217,6 +226,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -289,6 +302,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -359,6 +376,10 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -426,6 +447,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU-LABEL: workgroup_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -498,6 +523,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -570,6 +599,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -662,6 +695,13 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -744,6 +784,14 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -842,6 +890,15 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -940,6 +997,15 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1032,6 +1098,13 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1114,6 +1187,14 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1212,6 +1293,15 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1310,6 +1400,15 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1404,6 +1503,13 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1490,6 +1596,15 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1594,6 +1709,16 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1698,6 +1823,16 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1792,6 +1927,13 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1878,6 +2020,15 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1982,6 +2133,16 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -2086,6 +2247,16 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
index 7a419a5031ba9..767dbc1432242 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX6-LABEL: workgroup_acquire_fence:
@@ -76,6 +77,11 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -142,6 +148,10 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU-LABEL: workgroup_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -208,6 +218,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -274,6 +288,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -331,6 +349,10 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -388,6 +410,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU-LABEL: workgroup_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -445,6 +471,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -502,6 +532,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -570,6 +604,11 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -636,6 +675,10 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-LABEL: agent_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -702,6 +745,10 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-LABEL: agent_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -768,6 +815,10 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-LABEL: agent_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -825,6 +876,10 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX12-CU-LABEL: agent_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -882,6 +937,10 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-LABEL: agent_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -939,6 +998,10 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: agent_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -996,6 +1059,10 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: agent_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1064,6 +1131,11 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1130,6 +1202,10 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-LABEL: system_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1196,6 +1272,10 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-LABEL: system_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1262,6 +1342,10 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-LABEL: system_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1319,6 +1403,10 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX12-CU-LABEL: system_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1376,6 +1464,10 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-LABEL: system_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1433,6 +1525,10 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: system_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1490,6 +1586,10 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: system_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index 0e459ed0f1243..8d7194b834385 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX6-LABEL: singlethread_acquire_fence:
@@ -65,6 +66,10 @@ define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX12-CU-LABEL: singlethread_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: singlethread_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acquire
ret void
@@ -122,6 +127,10 @@ define amdgpu_kernel void @singlethread_release_fence() {
; GFX12-CU-LABEL: singlethread_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: singlethread_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") release
ret void
@@ -179,6 +188,10 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() {
; GFX12-CU-LABEL: singlethread_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: singlethread_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acq_rel
ret void
@@ -236,6 +249,10 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() {
; GFX12-CU-LABEL: singlethread_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: singlethread_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") seq_cst
ret void
@@ -293,6 +310,10 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() {
; GFX12-CU-LABEL: singlethread_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: singlethread_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acquire
ret void
@@ -350,6 +371,10 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() {
; GFX12-CU-LABEL: singlethread_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: singlethread_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") release
ret void
@@ -407,6 +432,10 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: singlethread_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: singlethread_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
@@ -464,6 +493,10 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: singlethread_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: singlethread_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
@@ -521,6 +554,10 @@ define amdgpu_kernel void @wavefront_acquire_fence() {
; GFX12-CU-LABEL: wavefront_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: wavefront_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acquire
ret void
@@ -578,6 +615,10 @@ define amdgpu_kernel void @wavefront_release_fence() {
; GFX12-CU-LABEL: wavefront_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: wavefront_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") release
ret void
@@ -635,6 +676,10 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() {
; GFX12-CU-LABEL: wavefront_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: wavefront_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acq_rel
ret void
@@ -692,6 +737,10 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() {
; GFX12-CU-LABEL: wavefront_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: wavefront_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") seq_cst
ret void
@@ -749,6 +798,10 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() {
; GFX12-CU-LABEL: wavefront_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: wavefront_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acquire
ret void
@@ -806,6 +859,10 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() {
; GFX12-CU-LABEL: wavefront_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: wavefront_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") release
ret void
@@ -863,6 +920,10 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: wavefront_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: wavefront_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
@@ -920,6 +981,10 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: wavefront_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: wavefront_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
@@ -998,6 +1063,11 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
ret void
@@ -1073,6 +1143,11 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
ret void
@@ -1153,6 +1228,11 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
ret void
@@ -1233,6 +1313,11 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
ret void
@@ -1303,6 +1388,10 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
ret void
@@ -1370,6 +1459,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU-LABEL: workgroup_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
ret void
@@ -1442,6 +1535,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
@@ -1514,6 +1611,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
@@ -1606,6 +1707,13 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire
ret void
@@ -1688,6 +1796,14 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") release
ret void
@@ -1786,6 +1902,15 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel
ret void
@@ -1884,6 +2009,15 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst
ret void
@@ -1976,6 +2110,13 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire
ret void
@@ -2058,6 +2199,14 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release
ret void
@@ -2156,6 +2305,15 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel
ret void
@@ -2254,6 +2412,15 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst
ret void
@@ -2348,6 +2515,13 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence acquire
ret void
@@ -2434,6 +2608,15 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence release
ret void
@@ -2538,6 +2721,16 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence acq_rel
ret void
@@ -2642,6 +2835,16 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence seq_cst
ret void
@@ -2736,6 +2939,13 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_acquire_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") acquire
ret void
@@ -2822,6 +3032,15 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_release_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") release
ret void
@@ -2926,6 +3145,16 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_acq_rel_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel
ret void
@@ -3030,6 +3259,16 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: system_one_as_seq_cst_fence:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
index 07ad8cb0c4a3d..05b599c6bc1c7 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
@@ -11,6 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @flat_agent_unordered_load(
; GFX7-LABEL: flat_agent_unordered_load:
@@ -187,6 +188,17 @@ define amdgpu_kernel void @flat_agent_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent") unordered, align 4
@@ -369,6 +381,17 @@ define amdgpu_kernel void @flat_agent_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent") monotonic, align 4
@@ -566,6 +589,18 @@ define amdgpu_kernel void @flat_agent_acquire_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent") acquire, align 4
@@ -789,6 +824,24 @@ define amdgpu_kernel void @flat_agent_seq_cst_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent") seq_cst, align 4
@@ -939,6 +992,16 @@ define amdgpu_kernel void @flat_agent_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent") unordered, align 4
@@ -1088,6 +1151,16 @@ define amdgpu_kernel void @flat_agent_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent") monotonic, align 4
@@ -1261,6 +1334,20 @@ define amdgpu_kernel void @flat_agent_release_store(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent") release, align 4
@@ -1434,6 +1521,20 @@ define amdgpu_kernel void @flat_agent_seq_cst_store(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent") seq_cst, align 4
@@ -1583,6 +1684,16 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") monotonic
@@ -1763,6 +1874,18 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acquire
@@ -1936,6 +2059,20 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") release
@@ -2140,6 +2277,22 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acq_rel
@@ -2344,6 +2497,22 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") seq_cst
@@ -2552,6 +2721,19 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acquire
@@ -2789,6 +2971,25 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acq_rel
@@ -3026,6 +3227,25 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") seq_cst
@@ -3264,6 +3484,20 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3533,6 +3767,22 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3795,6 +4045,24 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4088,6 +4356,26 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4381,6 +4669,26 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4650,6 +4958,22 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4919,6 +5243,22 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5212,6 +5552,26 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5505,6 +5865,26 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5798,6 +6178,26 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6091,6 +6491,26 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6384,6 +6804,26 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6677,6 +7117,26 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6970,6 +7430,26 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7263,6 +7743,26 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7545,6 +8045,22 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7844,6 +8360,23 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8152,6 +8685,26 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8479,6 +9032,29 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8806,6 +9382,29 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9109,6 +9708,25 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9408,6 +10026,23 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9735,6 +10370,29 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10062,6 +10720,29 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10389,6 +11070,29 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10716,6 +11420,29 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11039,6 +11766,27 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11366,6 +12114,29 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11693,6 +12464,29 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -12020,6 +12814,29 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -12204,6 +13021,17 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent-one-as") unordered, align 4
@@ -12386,6 +13214,17 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent-one-as") monotonic, align 4
@@ -12593,6 +13432,19 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent-one-as") acquire, align 4
@@ -12826,6 +13678,25 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent-one-as") seq_cst, align 4
@@ -12976,6 +13847,16 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent-one-as") unordered, align 4
@@ -13125,6 +14006,16 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent-one-as") monotonic, align 4
@@ -13298,6 +14189,20 @@ define amdgpu_kernel void @flat_agent_one_as_release_store(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent-one-as") release, align 4
@@ -13471,6 +14376,20 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent-one-as") seq_cst, align 4
@@ -13620,6 +14539,16 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") monotonic
@@ -13796,6 +14725,18 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acquire
@@ -13969,6 +14910,20 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") release
@@ -14169,6 +15124,22 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -14369,6 +15340,22 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -14587,6 +15574,20 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acquire
@@ -14834,6 +15835,26 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -15081,6 +16102,26 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -15319,6 +16360,20 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15584,6 +16639,22 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15846,6 +16917,24 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16135,6 +17224,26 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16424,6 +17533,26 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16689,6 +17818,22 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16954,6 +18099,22 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17243,6 +18404,26 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17532,6 +18713,26 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17821,6 +19022,26 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18110,6 +19331,26 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18399,6 +19640,26 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18688,6 +19949,26 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18977,6 +20258,26 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19266,6 +20567,26 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19548,6 +20869,22 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19857,6 +21194,24 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20165,6 +21520,26 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20502,6 +21877,30 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20839,6 +22238,30 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21152,6 +22575,26 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21461,6 +22904,24 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21798,6 +23259,30 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22135,6 +23620,30 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22472,6 +23981,30 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22809,6 +24342,30 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23142,6 +24699,28 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23479,6 +25058,30 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23816,6 +25419,30 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -24153,6 +25780,30 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
index a00af8e5b6582..0b5e6e9da7418 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
; GFX12-LABEL: flat_last_use_load_0:
@@ -16,6 +17,17 @@ define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_last_use_load_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
entry:
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr %out
@@ -55,6 +67,21 @@ define amdgpu_kernel void @flat_last_use_load_1(ptr %in, ptr %out) {
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_last_use_load_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr %in, i32 %tid
@@ -80,6 +107,19 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_last_use_and_volatile_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
entry:
%val = load volatile i32, ptr %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr %out
@@ -100,6 +140,17 @@ define amdgpu_kernel void @flat_last_use_and_nontemporal_load(ptr %in, ptr %out)
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_last_use_and_nontemporal_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
entry:
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}, !nontemporal !0
store i32 %val, ptr %out
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
index 3c24c36ec547d..58f33dfed87d6 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
@@ -11,6 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX7-LABEL: flat_nontemporal_load_0:
@@ -187,6 +188,17 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_nontemporal_load_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load i32, ptr %in, align 4, !nontemporal !0
@@ -555,6 +567,21 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_nontemporal_load_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -739,6 +766,17 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_nontemporal_store_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] th:TH_STORE_NT scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load i32, ptr %in, align 4
@@ -1095,6 +1133,20 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_nontemporal_store_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3]
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s2
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1293,6 +1345,19 @@ define amdgpu_kernel void @flat_nontemporal_volatile_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_nontemporal_volatile_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load volatile i32, ptr %in, align 4, !nontemporal !0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
index b88a10ab24a98..ed90e278d1e86 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
@@ -11,6 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @flat_singlethread_unordered_load(
; GFX7-LABEL: flat_singlethread_unordered_load:
@@ -187,6 +188,17 @@ define amdgpu_kernel void @flat_singlethread_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread") unordered, align 4
@@ -369,6 +381,17 @@ define amdgpu_kernel void @flat_singlethread_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread") monotonic, align 4
@@ -551,6 +574,17 @@ define amdgpu_kernel void @flat_singlethread_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread") acquire, align 4
@@ -733,6 +767,17 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread") seq_cst, align 4
@@ -883,6 +928,16 @@ define amdgpu_kernel void @flat_singlethread_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread") unordered, align 4
@@ -1032,6 +1087,16 @@ define amdgpu_kernel void @flat_singlethread_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread") monotonic, align 4
@@ -1181,6 +1246,16 @@ define amdgpu_kernel void @flat_singlethread_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread") release, align 4
@@ -1330,6 +1405,16 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread") seq_cst, align 4
@@ -1479,6 +1564,16 @@ define amdgpu_kernel void @flat_singlethread_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") monotonic
@@ -1628,6 +1723,16 @@ define amdgpu_kernel void @flat_singlethread_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acquire
@@ -1777,6 +1882,16 @@ define amdgpu_kernel void @flat_singlethread_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") release
@@ -1926,6 +2041,16 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acq_rel
@@ -2075,6 +2200,16 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") seq_cst
@@ -2268,6 +2403,18 @@ define amdgpu_kernel void @flat_singlethread_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acquire
@@ -2462,6 +2609,18 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acq_rel
@@ -2656,6 +2815,18 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") seq_cst
@@ -2894,6 +3065,20 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3132,6 +3317,20 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3370,6 +3569,20 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3608,6 +3821,20 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3846,6 +4073,20 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4084,6 +4325,20 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4322,6 +4577,20 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4560,6 +4829,20 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4798,6 +5081,20 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5036,6 +5333,20 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5274,6 +5585,20 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5512,6 +5837,20 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5750,6 +6089,20 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5988,6 +6341,20 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6226,6 +6593,20 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6508,6 +6889,22 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6792,6 +7189,22 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7076,6 +7489,22 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7360,6 +7789,22 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7644,6 +8089,22 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7928,6 +8389,22 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8212,6 +8689,22 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8496,6 +8989,22 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8780,6 +9289,22 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9064,6 +9589,22 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9348,6 +9889,22 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9632,6 +10189,22 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9916,6 +10489,22 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10200,6 +10789,22 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10484,6 +11089,22 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10668,6 +11289,17 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread-one-as") unordered, align 4
@@ -10850,6 +11482,17 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread-one-as") monotonic, align 4
@@ -11032,6 +11675,17 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread-one-as") acquire, align 4
@@ -11214,6 +11868,17 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread-one-as") seq_cst, align 4
@@ -11364,6 +12029,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread-one-as") unordered, align 4
@@ -11513,6 +12188,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread-one-as") monotonic, align 4
@@ -11662,6 +12347,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread-one-as") release, align 4
@@ -11811,6 +12506,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread-one-as") seq_cst, align 4
@@ -11960,6 +12665,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") monotonic
@@ -12109,6 +12824,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -12258,6 +12983,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") release
@@ -12407,6 +13142,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -12556,6 +13301,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -12749,6 +13504,18 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -12943,6 +13710,18 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -13137,6 +13916,18 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -13375,6 +14166,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -13613,6 +14418,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -13851,6 +14670,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14089,6 +14922,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14327,6 +15174,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14565,6 +15426,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14803,6 +15678,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15041,6 +15930,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15279,6 +16182,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15517,6 +16434,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15755,6 +16686,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15993,6 +16938,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16231,6 +17190,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16469,6 +17442,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16707,6 +17694,20 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16989,6 +17990,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17273,6 +18290,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17557,6 +18590,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17841,6 +18890,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18125,6 +19190,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18409,6 +19490,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18693,6 +19790,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18977,6 +20090,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19261,6 +20390,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19545,6 +20690,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19829,6 +20990,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20113,6 +21290,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20397,6 +21590,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20681,6 +21890,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20965,6 +22190,22 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
index 919fc3e8f4e4f..3c6ffdd1a6332 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
@@ -11,6 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @flat_system_unordered_load(
; GFX7-LABEL: flat_system_unordered_load:
@@ -187,6 +188,17 @@ define amdgpu_kernel void @flat_system_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in unordered, align 4
@@ -369,6 +381,17 @@ define amdgpu_kernel void @flat_system_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in monotonic, align 4
@@ -568,6 +591,18 @@ define amdgpu_kernel void @flat_system_acquire_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in acquire, align 4
@@ -793,6 +828,24 @@ define amdgpu_kernel void @flat_system_seq_cst_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in seq_cst, align 4
@@ -943,6 +996,16 @@ define amdgpu_kernel void @flat_system_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out unordered, align 4
@@ -1092,6 +1155,16 @@ define amdgpu_kernel void @flat_system_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out monotonic, align 4
@@ -1269,6 +1342,21 @@ define amdgpu_kernel void @flat_system_release_store(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out release, align 4
@@ -1446,6 +1534,21 @@ define amdgpu_kernel void @flat_system_seq_cst_store(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out seq_cst, align 4
@@ -1595,6 +1698,16 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in monotonic
@@ -1777,6 +1890,18 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in acquire
@@ -1954,6 +2079,21 @@ define amdgpu_kernel void @flat_system_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in release
@@ -2164,6 +2304,23 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in acq_rel
@@ -2374,6 +2531,23 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst
@@ -2584,6 +2758,19 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in acquire
@@ -2827,6 +3014,26 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in acq_rel
@@ -3070,6 +3277,26 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst
@@ -3308,6 +3535,20 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3579,6 +3820,22 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3845,6 +4102,25 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4144,6 +4420,27 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4443,6 +4740,27 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4714,6 +5032,22 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4985,6 +5319,22 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5284,6 +5634,27 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5583,6 +5954,27 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5882,6 +6274,27 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6181,6 +6594,27 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6480,6 +6914,27 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6779,6 +7234,27 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7078,6 +7554,27 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7377,6 +7874,27 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7659,6 +8177,22 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7960,6 +8494,23 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8272,6 +8823,27 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8605,6 +9177,30 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8938,6 +9534,30 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9243,6 +9863,25 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9544,6 +10183,23 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9877,6 +10533,30 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10210,6 +10890,30 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10543,6 +11247,30 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10876,6 +11604,30 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11205,6 +11957,28 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11538,6 +12312,30 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11871,6 +12669,30 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -12204,6 +13026,30 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -12388,6 +13234,17 @@ define amdgpu_kernel void @flat_system_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("one-as") unordered, align 4
@@ -12570,6 +13427,17 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("one-as") monotonic, align 4
@@ -12779,6 +13647,19 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("one-as") acquire, align 4
@@ -13014,6 +13895,25 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("one-as") seq_cst, align 4
@@ -13164,6 +14064,16 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("one-as") unordered, align 4
@@ -13313,6 +14223,16 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("one-as") monotonic, align 4
@@ -13490,6 +14410,21 @@ define amdgpu_kernel void @flat_system_one_as_release_store(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("one-as") release, align 4
@@ -13667,6 +14602,21 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("one-as") seq_cst, align 4
@@ -13816,6 +14766,16 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") monotonic
@@ -13994,6 +14954,18 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acquire
@@ -14171,6 +15143,21 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") release
@@ -14377,6 +15364,23 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acq_rel
@@ -14583,6 +15587,23 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") seq_cst
@@ -14803,6 +15824,20 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acquire
@@ -15056,6 +16091,27 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acq_rel
@@ -15309,6 +16365,27 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") seq_cst
@@ -15547,6 +16624,20 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15814,6 +16905,22 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16080,6 +17187,25 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16375,6 +17501,27 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16670,6 +17817,27 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16937,6 +18105,22 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17204,6 +18388,22 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17499,6 +18699,27 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17794,6 +19015,27 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18089,6 +19331,27 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18384,6 +19647,27 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18679,6 +19963,27 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18974,6 +20279,27 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19269,6 +20595,27 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19564,6 +20911,27 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19846,6 +21214,22 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20157,6 +21541,24 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20469,6 +21871,27 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20812,6 +22235,31 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21155,6 +22603,31 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21470,6 +22943,26 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21781,6 +23274,24 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22124,6 +23635,31 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22467,6 +24003,31 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22810,6 +24371,31 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23153,6 +24739,31 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23492,6 +25103,29 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23835,6 +25469,31 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -24178,6 +25837,31 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -24521,6 +26205,31 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
index a88e0e217fdb4..e0cf88891e421 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
@@ -7,6 +7,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX7-LABEL: flat_nontemporal_load_0:
@@ -143,6 +144,19 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_nontemporal_load_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load volatile i32, ptr %in, align 4
@@ -415,6 +429,23 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_nontemporal_load_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -563,6 +594,18 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_nontemporal_store_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load i32, ptr %in, align 4
@@ -831,6 +874,21 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_nontemporal_store_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3]
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s2
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -971,6 +1029,17 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_volatile_workgroup_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic volatile i32, ptr %in syncscope("workgroup") acquire, align 4
@@ -1090,6 +1159,17 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_volatile_workgroup_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic volatile i32 %in, ptr %out syncscope("workgroup") release, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
index 7c637a20ab47b..16135f5fc5d6f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
@@ -11,6 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @flat_wavefront_unordered_load(
; GFX7-LABEL: flat_wavefront_unordered_load:
@@ -187,6 +188,17 @@ define amdgpu_kernel void @flat_wavefront_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront") unordered, align 4
@@ -369,6 +381,17 @@ define amdgpu_kernel void @flat_wavefront_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront") monotonic, align 4
@@ -551,6 +574,17 @@ define amdgpu_kernel void @flat_wavefront_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront") acquire, align 4
@@ -733,6 +767,17 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront") seq_cst, align 4
@@ -883,6 +928,16 @@ define amdgpu_kernel void @flat_wavefront_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront") unordered, align 4
@@ -1032,6 +1087,16 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront") monotonic, align 4
@@ -1181,6 +1246,16 @@ define amdgpu_kernel void @flat_wavefront_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront") release, align 4
@@ -1330,6 +1405,16 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront") seq_cst, align 4
@@ -1479,6 +1564,16 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") monotonic
@@ -1628,6 +1723,16 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acquire
@@ -1777,6 +1882,16 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") release
@@ -1926,6 +2041,16 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acq_rel
@@ -2075,6 +2200,16 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") seq_cst
@@ -2268,6 +2403,18 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acquire
@@ -2462,6 +2609,18 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acq_rel
@@ -2656,6 +2815,18 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") seq_cst
@@ -2894,6 +3065,20 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3132,6 +3317,20 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3370,6 +3569,20 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3608,6 +3821,20 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3846,6 +4073,20 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4084,6 +4325,20 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4322,6 +4577,20 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4560,6 +4829,20 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4798,6 +5081,20 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5036,6 +5333,20 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5274,6 +5585,20 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5512,6 +5837,20 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5750,6 +6089,20 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5988,6 +6341,20 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6226,6 +6593,20 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6508,6 +6889,22 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6792,6 +7189,22 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7076,6 +7489,22 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7360,6 +7789,22 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7644,6 +8089,22 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7928,6 +8389,22 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8212,6 +8689,22 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8496,6 +8989,22 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8780,6 +9289,22 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9064,6 +9589,22 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9348,6 +9889,22 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9632,6 +10189,22 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9916,6 +10489,22 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10200,6 +10789,22 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10484,6 +11089,22 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10668,6 +11289,17 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront-one-as") unordered, align 4
@@ -10850,6 +11482,17 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront-one-as") monotonic, align 4
@@ -11032,6 +11675,17 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront-one-as") acquire, align 4
@@ -11214,6 +11868,17 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront-one-as") seq_cst, align 4
@@ -11364,6 +12029,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront-one-as") unordered, align 4
@@ -11513,6 +12188,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront-one-as") monotonic, align 4
@@ -11662,6 +12347,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront-one-as") release, align 4
@@ -11811,6 +12506,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront-one-as") seq_cst, align 4
@@ -11960,6 +12665,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") monotonic
@@ -12109,6 +12824,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -12258,6 +12983,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") release
@@ -12407,6 +13142,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -12556,6 +13301,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -12749,6 +13504,18 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -12943,6 +13710,18 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -13137,6 +13916,18 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -13375,6 +14166,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -13613,6 +14418,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -13851,6 +14670,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14089,6 +14922,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14327,6 +15174,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14565,6 +15426,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14803,6 +15678,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15041,6 +15930,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15279,6 +16182,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15517,6 +16434,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15755,6 +16686,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15993,6 +16938,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16231,6 +17190,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16469,6 +17442,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16707,6 +17694,20 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16989,6 +17990,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17273,6 +18290,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17557,6 +18590,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17841,6 +18890,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18125,6 +19190,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18409,6 +19490,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18693,6 +19790,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18977,6 +20090,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19261,6 +20390,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19545,6 +20690,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19829,6 +20990,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20113,6 +21290,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20397,6 +21590,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20681,6 +21890,22 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
index 0fd4aa4a7a93f..18e6812ded962 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
@@ -11,6 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @flat_workgroup_unordered_load(
; GFX7-LABEL: flat_workgroup_unordered_load:
@@ -187,6 +188,17 @@ define amdgpu_kernel void @flat_workgroup_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup") unordered, align 4
@@ -369,6 +381,17 @@ define amdgpu_kernel void @flat_workgroup_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup") monotonic, align 4
@@ -563,6 +586,17 @@ define amdgpu_kernel void @flat_workgroup_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup") acquire, align 4
@@ -776,6 +810,18 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup") seq_cst, align 4
@@ -926,6 +972,16 @@ define amdgpu_kernel void @flat_workgroup_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup") unordered, align 4
@@ -1075,6 +1131,16 @@ define amdgpu_kernel void @flat_workgroup_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup") monotonic, align 4
@@ -1241,6 +1307,17 @@ define amdgpu_kernel void @flat_workgroup_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup") release, align 4
@@ -1407,6 +1484,17 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup") seq_cst, align 4
@@ -1556,6 +1644,16 @@ define amdgpu_kernel void @flat_workgroup_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") monotonic
@@ -1724,6 +1822,17 @@ define amdgpu_kernel void @flat_workgroup_acquire_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acquire
@@ -1890,6 +1999,17 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") release
@@ -2075,6 +2195,18 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acq_rel
@@ -2260,6 +2392,18 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") seq_cst
@@ -2465,6 +2609,18 @@ define amdgpu_kernel void @flat_workgroup_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acquire
@@ -2690,6 +2846,19 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acq_rel
@@ -2915,6 +3084,19 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") seq_cst
@@ -3153,6 +3335,20 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3410,6 +3606,21 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3665,6 +3876,21 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3939,6 +4165,22 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4213,6 +4455,22 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4470,6 +4728,21 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4727,6 +5000,21 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5001,6 +5289,22 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5275,6 +5579,22 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5549,6 +5869,22 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5823,6 +6159,22 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6105,6 +6457,22 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6401,6 +6769,22 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6702,6 +7086,23 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7017,6 +7418,23 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7332,6 +7750,23 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7630,6 +8065,22 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7926,6 +8377,22 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8241,6 +8708,23 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8556,6 +9040,23 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8871,6 +9372,23 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9186,6 +9704,23 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9499,6 +10034,23 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9814,6 +10366,23 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10129,6 +10698,23 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10444,6 +11030,23 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10628,6 +11231,17 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup-one-as") unordered, align 4
@@ -10810,6 +11424,17 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup-one-as") monotonic, align 4
@@ -11000,6 +11625,17 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup-one-as") acquire, align 4
@@ -11202,6 +11838,17 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup-one-as") seq_cst, align 4
@@ -11352,6 +11999,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup-one-as") unordered, align 4
@@ -11501,6 +12158,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup-one-as") monotonic, align 4
@@ -11660,6 +12327,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup-one-as") release, align 4
@@ -11819,6 +12496,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup-one-as") seq_cst, align 4
@@ -11968,6 +12655,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") monotonic
@@ -12127,6 +12824,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -12286,6 +12993,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") release
@@ -12455,6 +13172,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -12624,6 +13351,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -12825,6 +13562,18 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -13039,6 +13788,18 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -13253,6 +14014,18 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -13491,6 +14264,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -13739,6 +14526,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -13987,6 +14788,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14245,6 +15060,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14503,6 +15332,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14751,6 +15594,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14999,6 +15856,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15257,6 +16128,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15515,6 +16400,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15773,6 +16672,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16031,6 +16944,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16289,6 +17216,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16547,6 +17488,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16805,6 +17760,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17063,6 +18032,20 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17345,6 +18328,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17637,6 +18636,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17931,6 +18946,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18235,6 +19266,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18539,6 +19586,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18833,6 +19896,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19125,6 +20204,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19429,6 +20524,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19733,6 +20844,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20037,6 +21164,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20341,6 +21484,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20643,6 +21802,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20947,6 +22122,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21251,6 +22442,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21555,6 +22762,22 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-CU-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
index 74a72e04fa4ae..51859c112bf9f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @global_agent_unordered_load(
; GFX6-LABEL: global_agent_unordered_load:
@@ -190,6 +191,17 @@ define amdgpu_kernel void @global_agent_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent") unordered, align 4
@@ -374,6 +386,17 @@ define amdgpu_kernel void @global_agent_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent") monotonic, align 4
@@ -574,6 +597,18 @@ define amdgpu_kernel void @global_agent_acquire_load(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent") acquire, align 4
@@ -793,6 +828,24 @@ define amdgpu_kernel void @global_agent_seq_cst_load(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent") seq_cst, align 4
@@ -950,6 +1003,16 @@ define amdgpu_kernel void @global_agent_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") unordered, align 4
@@ -1106,6 +1169,16 @@ define amdgpu_kernel void @global_agent_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 4
@@ -1287,6 +1360,20 @@ define amdgpu_kernel void @global_agent_release_store(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") release, align 4
@@ -1468,6 +1555,20 @@ define amdgpu_kernel void @global_agent_seq_cst_store(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 4
@@ -1622,6 +1723,16 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") monotonic
@@ -1805,6 +1916,18 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire
@@ -1984,6 +2107,20 @@ define amdgpu_kernel void @global_agent_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") release
@@ -2192,6 +2329,22 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel
@@ -2400,6 +2553,22 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
@@ -2598,6 +2767,19 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire
@@ -2826,6 +3008,25 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel
@@ -3054,6 +3255,25 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
@@ -3273,6 +3493,20 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3521,6 +3755,22 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3765,6 +4015,24 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4038,6 +4306,26 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4311,6 +4599,26 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4559,6 +4867,22 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4807,6 +5131,22 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5080,6 +5420,26 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5353,6 +5713,26 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5626,6 +6006,26 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5899,6 +6299,26 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6172,6 +6592,26 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6445,6 +6885,26 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6718,6 +7178,26 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6991,6 +7471,26 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7240,6 +7740,22 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7507,6 +8023,23 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7783,6 +8316,26 @@ define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8079,6 +8632,29 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8375,6 +8951,29 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8646,6 +9245,25 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8913,6 +9531,23 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9209,6 +9844,29 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9505,6 +10163,29 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9801,6 +10482,29 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10097,6 +10801,29 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10389,6 +11116,27 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10685,6 +11433,29 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10981,6 +11752,29 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -11277,6 +12071,29 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -11463,6 +12280,17 @@ define amdgpu_kernel void @global_agent_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") unordered, align 4
@@ -11647,6 +12475,17 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") monotonic, align 4
@@ -11847,6 +12686,18 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") acquire, align 4
@@ -12066,6 +12917,24 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") seq_cst, align 4
@@ -12223,6 +13092,16 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") unordered, align 4
@@ -12379,6 +13258,16 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") monotonic, align 4
@@ -12560,6 +13449,20 @@ define amdgpu_kernel void @global_agent_one_as_release_store(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") release, align 4
@@ -12741,6 +13644,20 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") seq_cst, align 4
@@ -12895,6 +13812,16 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") monotonic
@@ -13078,6 +14005,18 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire
@@ -13257,6 +14196,20 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") release
@@ -13465,6 +14418,22 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -13673,6 +14642,22 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -13871,6 +14856,19 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire
@@ -14099,6 +15097,25 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -14327,6 +15344,25 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -14546,6 +15582,20 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14794,6 +15844,22 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15038,6 +16104,24 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15311,6 +16395,26 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15584,6 +16688,26 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15832,6 +16956,22 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16080,6 +17220,22 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16353,6 +17509,26 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16626,6 +17802,26 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16899,6 +18095,26 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17172,6 +18388,26 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17445,6 +18681,26 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17718,6 +18974,26 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17991,6 +19267,26 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18264,6 +19560,26 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18513,6 +19829,22 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18780,6 +20112,23 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19076,6 +20425,29 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19372,6 +20744,29 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19643,6 +21038,25 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19910,6 +21324,23 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20206,6 +21637,29 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20502,6 +21956,29 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20798,6 +22275,29 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21094,6 +22594,29 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21386,6 +22909,27 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21682,6 +23226,29 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21978,6 +23545,29 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -22274,6 +23864,29 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
index 5f952b98041f3..2c8fa9da74862 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX12-LABEL: global_last_use_load_0:
@@ -14,6 +15,18 @@ define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addr
; GFX12-NEXT: v_mov_b32_e32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_last_use_load_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
entry:
%val = load i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr addrspace(1) %out
@@ -37,6 +50,21 @@ define amdgpu_kernel void @global_last_use_load_1(ptr addrspace(1) %in, ptr addr
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_last_use_load_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
@@ -58,6 +86,19 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_last_use_and_volatile_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr addrspace(1) %out
@@ -81,6 +122,21 @@ define amdgpu_kernel void @global_last_use_and_nontemporal_load(ptr addrspace(1)
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_last_use_and_nontemporal_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
index 16e55058e4fc8..e73300dbc5ac6 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @global_nontemporal_load_0(
; GFX6-LABEL: global_nontemporal_load_0:
@@ -189,6 +190,18 @@ define amdgpu_kernel void @global_nontemporal_load_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_nontemporal_load_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4, !nontemporal !0
@@ -448,6 +461,21 @@ define amdgpu_kernel void @global_nontemporal_load_1(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_nontemporal_load_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -633,6 +661,18 @@ define amdgpu_kernel void @global_nontemporal_store_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_nontemporal_store_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -866,6 +906,20 @@ define amdgpu_kernel void @global_nontemporal_store_1(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_nontemporal_store_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s3, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s3
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1056,6 +1110,19 @@ define amdgpu_kernel void @global_nontemporal_volatile_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_nontemporal_volatile_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4, !nontemporal !0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
index 8042d38716107..2633bba70ddd3 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @global_singlethread_unordered_load(
; GFX6-LABEL: global_singlethread_unordered_load:
@@ -190,6 +191,17 @@ define amdgpu_kernel void @global_singlethread_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") unordered, align 4
@@ -374,6 +386,17 @@ define amdgpu_kernel void @global_singlethread_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") monotonic, align 4
@@ -558,6 +581,17 @@ define amdgpu_kernel void @global_singlethread_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") acquire, align 4
@@ -742,6 +776,17 @@ define amdgpu_kernel void @global_singlethread_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") seq_cst, align 4
@@ -899,6 +944,16 @@ define amdgpu_kernel void @global_singlethread_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") unordered, align 4
@@ -1055,6 +1110,16 @@ define amdgpu_kernel void @global_singlethread_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") monotonic, align 4
@@ -1211,6 +1276,16 @@ define amdgpu_kernel void @global_singlethread_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") release, align 4
@@ -1367,6 +1442,16 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") seq_cst, align 4
@@ -1521,6 +1606,16 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") monotonic
@@ -1675,6 +1770,16 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acquire
@@ -1829,6 +1934,16 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") release
@@ -1983,6 +2098,16 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acq_rel
@@ -2137,6 +2262,16 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") seq_cst
@@ -2319,6 +2454,18 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acquire
@@ -2502,6 +2649,18 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acq_rel
@@ -2685,6 +2844,18 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") seq_cst
@@ -2904,6 +3075,20 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3123,6 +3308,20 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3342,6 +3541,20 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3561,6 +3774,20 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3780,6 +4007,20 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3999,6 +4240,20 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4218,6 +4473,20 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4437,6 +4706,20 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4656,6 +4939,20 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4875,6 +5172,20 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5094,6 +5405,20 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5313,6 +5638,20 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5532,6 +5871,20 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5751,6 +6104,20 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5970,6 +6337,20 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6219,6 +6600,22 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6470,6 +6867,22 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6721,6 +7134,22 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6972,6 +7401,22 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7223,6 +7668,22 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7474,6 +7935,22 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7725,6 +8202,22 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7976,6 +8469,22 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8227,6 +8736,22 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8478,6 +9003,22 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8729,6 +9270,22 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8980,6 +9537,22 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9231,6 +9804,22 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9482,6 +10071,22 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9733,6 +10338,22 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9919,6 +10540,17 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") unordered, align 4
@@ -10103,6 +10735,17 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") monotonic, align 4
@@ -10287,6 +10930,17 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") acquire, align 4
@@ -10471,6 +11125,17 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") seq_cst, align 4
@@ -10628,6 +11293,16 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") unordered, align 4
@@ -10784,6 +11459,16 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") monotonic, align 4
@@ -10940,6 +11625,16 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") release, align 4
@@ -11096,6 +11791,16 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") seq_cst, align 4
@@ -11250,6 +11955,16 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") monotonic
@@ -11404,6 +12119,16 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -11558,6 +12283,16 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") release
@@ -11712,6 +12447,16 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -11866,6 +12611,16 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -12048,6 +12803,18 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -12231,6 +12998,18 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -12414,6 +13193,18 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -12633,6 +13424,20 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxch
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -12852,6 +13657,20 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13071,6 +13890,20 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13290,6 +14123,20 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13509,6 +14356,20 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13728,6 +14589,20 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13947,6 +14822,20 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14166,6 +15055,20 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14385,6 +15288,20 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14604,6 +15521,20 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14823,6 +15754,20 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15042,6 +15987,20 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15261,6 +16220,20 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15480,6 +16453,20 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15699,6 +16686,20 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15948,6 +16949,22 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_ret_cm
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16199,6 +17216,22 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpx
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16450,6 +17483,22 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_ret_cmpx
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16701,6 +17750,22 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpx
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16952,6 +18017,22 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpx
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17203,6 +18284,22 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_ret_cmpx
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17454,6 +18551,22 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17705,6 +18818,22 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17956,6 +19085,22 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18207,6 +19352,22 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18458,6 +19619,22 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_ret_cmpx
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18709,6 +19886,22 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18960,6 +20153,22 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19211,6 +20420,22 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19462,6 +20687,22 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
index be148464c156e..c194b49f25255 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @global_system_unordered_load(
; GFX6-LABEL: global_system_unordered_load:
@@ -190,6 +191,17 @@ define amdgpu_kernel void @global_system_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in unordered, align 4
@@ -374,6 +386,17 @@ define amdgpu_kernel void @global_system_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in monotonic, align 4
@@ -576,6 +599,18 @@ define amdgpu_kernel void @global_system_acquire_load(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in acquire, align 4
@@ -797,6 +832,24 @@ define amdgpu_kernel void @global_system_seq_cst_load(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in seq_cst, align 4
@@ -954,6 +1007,16 @@ define amdgpu_kernel void @global_system_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out unordered, align 4
@@ -1110,6 +1173,16 @@ define amdgpu_kernel void @global_system_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out monotonic, align 4
@@ -1295,6 +1368,21 @@ define amdgpu_kernel void @global_system_release_store(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out release, align 4
@@ -1480,6 +1568,21 @@ define amdgpu_kernel void @global_system_seq_cst_store(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out seq_cst, align 4
@@ -1634,6 +1737,16 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in monotonic
@@ -1819,6 +1932,18 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire
@@ -2002,6 +2127,21 @@ define amdgpu_kernel void @global_system_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in release
@@ -2216,6 +2356,23 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel
@@ -2430,6 +2587,23 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
@@ -2630,6 +2804,19 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire
@@ -2864,6 +3051,26 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel
@@ -3098,6 +3305,26 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
@@ -3317,6 +3544,20 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3567,6 +3808,22 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3815,6 +4072,25 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4094,6 +4370,27 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4373,6 +4670,27 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4623,6 +4941,22 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4873,6 +5207,22 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5152,6 +5502,27 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5431,6 +5802,27 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5710,6 +6102,27 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5989,6 +6402,27 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6238,6 +6672,22 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6507,6 +6957,23 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6809,6 +7276,30 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7111,6 +7602,30 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7384,6 +7899,25 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7653,6 +8187,23 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7955,6 +8506,30 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8257,6 +8832,30 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8559,6 +9158,30 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8861,6 +9484,30 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9159,6 +9806,28 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9461,6 +10130,30 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9763,6 +10456,30 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10065,6 +10782,30 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10251,6 +10992,17 @@ define amdgpu_kernel void @global_system_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") unordered, align 4
@@ -10435,6 +11187,17 @@ define amdgpu_kernel void @global_system_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") monotonic, align 4
@@ -10637,6 +11400,18 @@ define amdgpu_kernel void @global_system_one_as_acquire_load(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") acquire, align 4
@@ -10858,6 +11633,24 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") seq_cst, align 4
@@ -11015,6 +11808,16 @@ define amdgpu_kernel void @global_system_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") unordered, align 4
@@ -11171,6 +11974,16 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") monotonic, align 4
@@ -11356,6 +12169,21 @@ define amdgpu_kernel void @global_system_one_as_release_store(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") release, align 4
@@ -11541,6 +12369,21 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") seq_cst, align 4
@@ -11695,6 +12538,16 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") monotonic
@@ -11880,6 +12733,18 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire
@@ -12063,6 +12928,21 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") release
@@ -12277,6 +13157,23 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel
@@ -12491,6 +13388,23 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst
@@ -12691,6 +13605,19 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire
@@ -12925,6 +13852,26 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel
@@ -13159,6 +14106,26 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst
@@ -13378,6 +14345,20 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13628,6 +14609,22 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13876,6 +14873,25 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14155,6 +15171,27 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14434,6 +15471,27 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14684,6 +15742,22 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14934,6 +16008,22 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15213,6 +16303,27 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15492,6 +16603,27 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15771,6 +16903,27 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16050,6 +17203,27 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16329,6 +17503,27 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16608,6 +17803,27 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16887,6 +18103,27 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17166,6 +18403,27 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17415,6 +18673,22 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17684,6 +18958,23 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17964,6 +19255,27 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18266,6 +19578,30 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18568,6 +19904,30 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18841,6 +20201,25 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19110,6 +20489,23 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19412,6 +20808,30 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19714,6 +21134,30 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20016,6 +21460,30 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20318,6 +21786,30 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20616,6 +22108,28 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20918,6 +22432,30 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21220,6 +22758,30 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21522,6 +23084,30 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
index 8a5c5dda9f79c..10d9ee0617a0e 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
@@ -8,6 +8,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @global_volatile_load_0(
; GFX6-LABEL: global_volatile_load_0:
@@ -146,6 +147,19 @@ define amdgpu_kernel void @global_volatile_load_0(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_volatile_load_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4
@@ -345,6 +359,23 @@ define amdgpu_kernel void @global_volatile_load_1(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_volatile_load_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -501,6 +532,19 @@ define amdgpu_kernel void @global_volatile_store_0(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_volatile_store_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -693,6 +737,21 @@ define amdgpu_kernel void @global_volatile_store_1(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_volatile_store_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s3, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s3
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
+; GFX1250-CU-NEXT: s_wait_storecnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -838,6 +897,17 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_volatile_workgroup_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic volatile i32, ptr addrspace(1) %in syncscope("workgroup") acquire, align 4
@@ -969,6 +1039,17 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_volatile_workgroup_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic volatile i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
index 151ba07a0b531..f64b283edf43f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @global_wavefront_unordered_load(
; GFX6-LABEL: global_wavefront_unordered_load:
@@ -190,6 +191,17 @@ define amdgpu_kernel void @global_wavefront_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") unordered, align 4
@@ -374,6 +386,17 @@ define amdgpu_kernel void @global_wavefront_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") monotonic, align 4
@@ -558,6 +581,17 @@ define amdgpu_kernel void @global_wavefront_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") acquire, align 4
@@ -742,6 +776,17 @@ define amdgpu_kernel void @global_wavefront_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") seq_cst, align 4
@@ -899,6 +944,16 @@ define amdgpu_kernel void @global_wavefront_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") unordered, align 4
@@ -1055,6 +1110,16 @@ define amdgpu_kernel void @global_wavefront_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") monotonic, align 4
@@ -1211,6 +1276,16 @@ define amdgpu_kernel void @global_wavefront_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") release, align 4
@@ -1367,6 +1442,16 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") seq_cst, align 4
@@ -1521,6 +1606,16 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") monotonic
@@ -1675,6 +1770,16 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acquire
@@ -1829,6 +1934,16 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") release
@@ -1983,6 +2098,16 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acq_rel
@@ -2137,6 +2262,16 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") seq_cst
@@ -2319,6 +2454,18 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acquire
@@ -2502,6 +2649,18 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acq_rel
@@ -2685,6 +2844,18 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") seq_cst
@@ -2904,6 +3075,20 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3123,6 +3308,20 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3342,6 +3541,20 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3561,6 +3774,20 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3780,6 +4007,20 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3999,6 +4240,20 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4218,6 +4473,20 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4437,6 +4706,20 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4656,6 +4939,20 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4875,6 +5172,20 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5094,6 +5405,20 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5313,6 +5638,20 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5532,6 +5871,20 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5751,6 +6104,20 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5970,6 +6337,20 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6219,6 +6600,22 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6470,6 +6867,22 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6721,6 +7134,22 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6972,6 +7401,22 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7223,6 +7668,22 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7474,6 +7935,22 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7725,6 +8202,22 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7976,6 +8469,22 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8227,6 +8736,22 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8478,6 +9003,22 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8729,6 +9270,22 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8980,6 +9537,22 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9231,6 +9804,22 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9482,6 +10071,22 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9733,6 +10338,22 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9919,6 +10540,17 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") unordered, align 4
@@ -10103,6 +10735,17 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") monotonic, align 4
@@ -10287,6 +10930,17 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") acquire, align 4
@@ -10471,6 +11125,17 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") seq_cst, align 4
@@ -10628,6 +11293,16 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") unordered, align 4
@@ -10784,6 +11459,16 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") monotonic, align 4
@@ -10940,6 +11625,16 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") release, align 4
@@ -11096,6 +11791,16 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") seq_cst, align 4
@@ -11250,6 +11955,16 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") monotonic
@@ -11404,6 +12119,16 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -11558,6 +12283,16 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") release
@@ -11712,6 +12447,16 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -11866,6 +12611,16 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -12048,6 +12803,18 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -12231,6 +12998,18 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -12414,6 +13193,18 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -12633,6 +13424,20 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -12852,6 +13657,20 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13071,6 +13890,20 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13290,6 +14123,20 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13509,6 +14356,20 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13728,6 +14589,20 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13947,6 +14822,20 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14166,6 +15055,20 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14385,6 +15288,20 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14604,6 +15521,20 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14823,6 +15754,20 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15042,6 +15987,20 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15261,6 +16220,20 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15480,6 +16453,20 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15699,6 +16686,20 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15948,6 +16949,22 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxc
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16199,6 +17216,22 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16450,6 +17483,22 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16701,6 +17750,22 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16952,6 +18017,22 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17203,6 +18284,22 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17454,6 +18551,22 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17705,6 +18818,22 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17956,6 +19085,22 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18207,6 +19352,22 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18458,6 +19619,22 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18709,6 +19886,22 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18960,6 +20153,22 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19211,6 +20420,22 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19462,6 +20687,22 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
index 69b0c7f93ab0e..c1879c8eb11af 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @global_workgroup_unordered_load(
; GFX6-LABEL: global_workgroup_unordered_load:
@@ -190,6 +191,17 @@ define amdgpu_kernel void @global_workgroup_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") unordered, align 4
@@ -374,6 +386,17 @@ define amdgpu_kernel void @global_workgroup_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") monotonic, align 4
@@ -563,6 +586,17 @@ define amdgpu_kernel void @global_workgroup_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") acquire, align 4
@@ -764,6 +798,18 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") seq_cst, align 4
@@ -921,6 +967,16 @@ define amdgpu_kernel void @global_workgroup_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") unordered, align 4
@@ -1077,6 +1133,16 @@ define amdgpu_kernel void @global_workgroup_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") monotonic, align 4
@@ -1251,6 +1317,17 @@ define amdgpu_kernel void @global_workgroup_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4
@@ -1425,6 +1502,17 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") seq_cst, align 4
@@ -1579,6 +1667,16 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") monotonic
@@ -1743,6 +1841,16 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acquire
@@ -1915,6 +2023,17 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") release
@@ -2097,6 +2216,17 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acq_rel
@@ -2279,6 +2409,17 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") seq_cst
@@ -2466,6 +2607,18 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acquire
@@ -2674,6 +2827,19 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acq_rel
@@ -2882,6 +3048,19 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") seq_cst
@@ -3101,6 +3280,20 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3330,6 +3523,20 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3567,6 +3774,21 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3814,6 +4036,21 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4061,6 +4298,21 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4290,6 +4542,20 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4519,6 +4785,20 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4766,6 +5046,21 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5013,6 +5308,21 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5260,6 +5570,21 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5507,6 +5832,21 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5754,6 +6094,21 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6001,6 +6356,21 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6248,6 +6618,21 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6495,6 +6880,21 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6744,6 +7144,22 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7000,6 +7416,22 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7269,6 +7701,23 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7545,6 +7994,23 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7821,6 +8287,23 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8079,6 +8562,22 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8335,6 +8834,22 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8611,6 +9126,23 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8887,6 +9419,23 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9163,6 +9712,23 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9439,6 +10005,23 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9713,6 +10296,23 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9989,6 +10589,23 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10265,6 +10882,23 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10541,6 +11175,23 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10727,6 +11378,17 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") unordered, align 4
@@ -10911,6 +11573,17 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") monotonic, align 4
@@ -11100,6 +11773,17 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") acquire, align 4
@@ -11297,6 +11981,17 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") seq_cst, align 4
@@ -11454,6 +12149,16 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") unordered, align 4
@@ -11610,6 +12315,16 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") monotonic, align 4
@@ -11776,6 +12491,16 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") release, align 4
@@ -11942,6 +12667,16 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") seq_cst, align 4
@@ -12096,6 +12831,16 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") monotonic
@@ -12260,6 +13005,16 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -12424,6 +13179,16 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") release
@@ -12598,6 +13363,16 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -12772,6 +13547,16 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -12959,6 +13744,18 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -13159,6 +13956,18 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -13359,6 +14168,18 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -13578,6 +14399,20 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13807,6 +14642,20 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14036,6 +14885,20 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14275,6 +15138,20 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14514,6 +15391,20 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14743,6 +15634,20 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14972,6 +15877,20 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15211,6 +16130,20 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15450,6 +16383,20 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15689,6 +16636,20 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15928,6 +16889,20 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16167,6 +17142,20 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16406,6 +17395,20 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16645,6 +17648,20 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16884,6 +17901,20 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17133,6 +18164,22 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_ret_cmpxc
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17389,6 +18436,22 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17650,6 +18713,22 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17918,6 +18997,22 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18186,6 +19281,22 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18444,6 +19555,22 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18700,6 +19827,22 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18968,6 +20111,22 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19236,6 +20395,22 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19504,6 +20679,22 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19772,6 +20963,22 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20038,6 +21245,22 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20306,6 +21529,22 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20574,6 +21813,22 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20842,6 +22097,22 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
index 0467c5047a0be..fe703f5e8c90f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @local_agent_unordered_load(
; GFX6-LABEL: local_agent_unordered_load:
@@ -177,6 +178,18 @@ define amdgpu_kernel void @local_agent_unordered_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") unordered, align 4
@@ -348,6 +361,18 @@ define amdgpu_kernel void @local_agent_monotonic_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") monotonic, align 4
@@ -524,6 +549,18 @@ define amdgpu_kernel void @local_agent_acquire_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") acquire, align 4
@@ -718,6 +755,19 @@ define amdgpu_kernel void @local_agent_seq_cst_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") seq_cst, align 4
@@ -859,6 +909,16 @@ define amdgpu_kernel void @local_agent_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") unordered, align 4
@@ -999,6 +1059,16 @@ define amdgpu_kernel void @local_agent_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") monotonic, align 4
@@ -1157,6 +1227,17 @@ define amdgpu_kernel void @local_agent_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") release, align 4
@@ -1315,6 +1396,17 @@ define amdgpu_kernel void @local_agent_seq_cst_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") seq_cst, align 4
@@ -1455,6 +1547,16 @@ define amdgpu_kernel void @local_agent_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") monotonic
@@ -1611,6 +1713,17 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acquire
@@ -1769,6 +1882,17 @@ define amdgpu_kernel void @local_agent_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") release
@@ -1943,6 +2067,18 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acq_rel
@@ -2117,6 +2253,18 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") seq_cst
@@ -2304,6 +2452,19 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acquire
@@ -2510,6 +2671,20 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acq_rel
@@ -2716,6 +2891,20 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") seq_cst
@@ -2883,6 +3072,18 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3066,6 +3267,19 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3251,6 +3465,19 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3452,6 +3679,20 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3653,6 +3894,20 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3836,6 +4091,19 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4019,6 +4287,19 @@ define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4220,6 +4501,20 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4421,6 +4716,20 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4622,6 +4931,20 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4823,6 +5146,20 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5024,6 +5361,20 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5225,6 +5576,20 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5426,6 +5791,20 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5627,6 +6006,20 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5836,6 +6229,21 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6052,6 +6460,21 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6281,6 +6704,22 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6515,6 +6954,22 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6749,6 +7204,22 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6965,6 +7436,21 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7181,6 +7667,21 @@ define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7415,6 +7916,22 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7649,6 +8166,22 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7883,6 +8416,22 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8117,6 +8666,22 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8351,6 +8916,22 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8585,6 +9166,22 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8819,6 +9416,22 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9053,6 +9666,22 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9226,6 +9855,18 @@ define amdgpu_kernel void @local_agent_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") unordered, align 4
@@ -9397,6 +10038,18 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") monotonic, align 4
@@ -9568,6 +10221,18 @@ define amdgpu_kernel void @local_agent_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") acquire, align 4
@@ -9739,6 +10404,18 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") seq_cst, align 4
@@ -9880,6 +10557,16 @@ define amdgpu_kernel void @local_agent_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") unordered, align 4
@@ -10020,6 +10707,16 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") monotonic, align 4
@@ -10160,6 +10857,16 @@ define amdgpu_kernel void @local_agent_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") release, align 4
@@ -10300,6 +11007,16 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") seq_cst, align 4
@@ -10440,6 +11157,16 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") monotonic
@@ -10580,6 +11307,16 @@ define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acquire
@@ -10720,6 +11457,16 @@ define amdgpu_kernel void @local_agent_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") release
@@ -10860,6 +11607,16 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -11000,6 +11757,16 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -11182,6 +11949,19 @@ define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acquire
@@ -11365,6 +12145,19 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -11548,6 +12341,19 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -11715,6 +12521,18 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11882,6 +12700,18 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12049,6 +12879,18 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12216,6 +13058,18 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12383,6 +13237,18 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12550,6 +13416,18 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12717,6 +13595,18 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12884,6 +13774,18 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13051,6 +13953,18 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13218,6 +14132,18 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13385,6 +14311,18 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13552,6 +14490,18 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13719,6 +14669,18 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13886,6 +14848,18 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14053,6 +15027,18 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14262,6 +15248,21 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14473,6 +15474,21 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14684,6 +15700,21 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14895,6 +15926,21 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15106,6 +16152,21 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15317,6 +16378,21 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15528,6 +16604,21 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15739,6 +16830,21 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15950,6 +17056,21 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16161,6 +17282,21 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16372,6 +17508,21 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16583,6 +17734,21 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16794,6 +17960,21 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17005,6 +18186,21 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17216,6 +18412,21 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
index 78209ee34cad4..689932469d78d 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @local_nontemporal_load_0(
; GFX6-LABEL: local_nontemporal_load_0:
@@ -193,6 +194,18 @@ define amdgpu_kernel void @local_nontemporal_load_0(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_nontemporal_load_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ds_load_b32 v1, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(3) %in, align 4, !nontemporal !0
@@ -428,6 +441,22 @@ define amdgpu_kernel void @local_nontemporal_load_1(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_nontemporal_load_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s2
+; GFX1250-CU-NEXT: s_mov_b32 s2, 2
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3
+; GFX1250-CU-NEXT: ds_load_b32 v1, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -597,6 +626,18 @@ define amdgpu_kernel void @local_nontemporal_store_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_nontemporal_store_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -802,6 +843,22 @@ define amdgpu_kernel void @local_nontemporal_store_1(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_nontemporal_store_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s1, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s1
+; GFX1250-CU-NEXT: s_mov_b32 s1, 2
+; GFX1250-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -991,6 +1048,18 @@ define amdgpu_kernel void @local_nontemporal_volatile_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_nontemporal_volatile_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ds_load_b32 v1, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(3) %in, align 4, !nontemporal !0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
index f84d451f8ecb0..97c80ece2b053 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @local_singlethread_unordered_load(
; GFX6-LABEL: local_singlethread_unordered_load:
@@ -177,6 +178,18 @@ define amdgpu_kernel void @local_singlethread_unordered_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") unordered, align 4
@@ -348,6 +361,18 @@ define amdgpu_kernel void @local_singlethread_monotonic_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") monotonic, align 4
@@ -519,6 +544,18 @@ define amdgpu_kernel void @local_singlethread_acquire_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") acquire, align 4
@@ -690,6 +727,18 @@ define amdgpu_kernel void @local_singlethread_seq_cst_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") seq_cst, align 4
@@ -831,6 +880,16 @@ define amdgpu_kernel void @local_singlethread_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") unordered, align 4
@@ -971,6 +1030,16 @@ define amdgpu_kernel void @local_singlethread_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") monotonic, align 4
@@ -1111,6 +1180,16 @@ define amdgpu_kernel void @local_singlethread_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") release, align 4
@@ -1251,6 +1330,16 @@ define amdgpu_kernel void @local_singlethread_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") seq_cst, align 4
@@ -1391,6 +1480,16 @@ define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") monotonic
@@ -1531,6 +1630,16 @@ define amdgpu_kernel void @local_singlethread_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acquire
@@ -1671,6 +1780,16 @@ define amdgpu_kernel void @local_singlethread_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") release
@@ -1811,6 +1930,16 @@ define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acq_rel
@@ -1951,6 +2080,16 @@ define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") seq_cst
@@ -2133,6 +2272,19 @@ define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acquire
@@ -2316,6 +2468,19 @@ define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acq_rel
@@ -2499,6 +2664,19 @@ define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") seq_cst
@@ -2666,6 +2844,18 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -2833,6 +3023,18 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3000,6 +3202,18 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3167,6 +3381,18 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3334,6 +3560,18 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3501,6 +3739,18 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3668,6 +3918,18 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3835,6 +4097,18 @@ define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4002,6 +4276,18 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4169,6 +4455,18 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4336,6 +4634,18 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4503,6 +4813,18 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4670,6 +4992,18 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4837,6 +5171,18 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5004,6 +5350,18 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5213,6 +5571,21 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5424,6 +5797,21 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5635,6 +6023,21 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5846,6 +6249,21 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6057,6 +6475,21 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6268,6 +6701,21 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6479,6 +6927,21 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6690,6 +7153,21 @@ define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6901,6 +7379,21 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7112,6 +7605,21 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7323,6 +7831,21 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7534,6 +8057,21 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7745,6 +8283,21 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7956,6 +8509,21 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8167,6 +8735,21 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8340,6 +8923,18 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") unordered, align 4
@@ -8511,6 +9106,18 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") monotonic, align 4
@@ -8682,6 +9289,18 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") acquire, align 4
@@ -8853,6 +9472,18 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") seq_cst, align 4
@@ -8994,6 +9625,16 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") unordered, align 4
@@ -9134,6 +9775,16 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") monotonic, align 4
@@ -9274,6 +9925,16 @@ define amdgpu_kernel void @local_singlethread_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") release, align 4
@@ -9414,6 +10075,16 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") seq_cst, align 4
@@ -9554,6 +10225,16 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") monotonic
@@ -9694,6 +10375,16 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -9834,6 +10525,16 @@ define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") release
@@ -9974,6 +10675,16 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -10114,6 +10825,16 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -10296,6 +11017,19 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -10479,6 +11213,19 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -10662,6 +11409,19 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -10829,6 +11589,18 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -10996,6 +11768,18 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11163,6 +11947,18 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11330,6 +12126,18 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11497,6 +12305,18 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11664,6 +12484,18 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11831,6 +12663,18 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11998,6 +12842,18 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12165,6 +13021,18 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12332,6 +13200,18 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12499,6 +13379,18 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12666,6 +13558,18 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12833,6 +13737,18 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13000,6 +13916,18 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13167,6 +14095,18 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13376,6 +14316,21 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_ret_cmp
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13587,6 +14542,21 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxc
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13798,6 +14768,21 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_ret_cmpxc
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14009,6 +14994,21 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxc
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14220,6 +15220,21 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxc
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14431,6 +15446,21 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_ret_cmpxc
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14642,6 +15672,21 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14853,6 +15898,21 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15064,6 +16124,21 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15275,6 +16350,21 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15486,6 +16576,21 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_ret_cmpxc
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15697,6 +16802,21 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15908,6 +17028,21 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16119,6 +17254,21 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16330,6 +17480,21 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
index 74a297241d851..fdf69a5998652 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @local_system_unordered_load(
; GFX6-LABEL: local_system_unordered_load:
@@ -177,6 +178,18 @@ define amdgpu_kernel void @local_system_unordered_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in unordered, align 4
@@ -348,6 +361,18 @@ define amdgpu_kernel void @local_system_monotonic_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in monotonic, align 4
@@ -524,6 +549,18 @@ define amdgpu_kernel void @local_system_acquire_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in acquire, align 4
@@ -718,6 +755,19 @@ define amdgpu_kernel void @local_system_seq_cst_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in seq_cst, align 4
@@ -859,6 +909,16 @@ define amdgpu_kernel void @local_system_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out unordered, align 4
@@ -999,6 +1059,16 @@ define amdgpu_kernel void @local_system_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out monotonic, align 4
@@ -1157,6 +1227,17 @@ define amdgpu_kernel void @local_system_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out release, align 4
@@ -1315,6 +1396,17 @@ define amdgpu_kernel void @local_system_seq_cst_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out seq_cst, align 4
@@ -1455,6 +1547,16 @@ define amdgpu_kernel void @local_system_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in monotonic
@@ -1611,6 +1713,17 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acquire
@@ -1769,6 +1882,17 @@ define amdgpu_kernel void @local_system_release_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in release
@@ -1943,6 +2067,18 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acq_rel
@@ -2117,6 +2253,18 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in seq_cst
@@ -2304,6 +2452,19 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acquire
@@ -2510,6 +2671,20 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acq_rel
@@ -2716,6 +2891,20 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in seq_cst
@@ -2883,6 +3072,18 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3066,6 +3267,19 @@ define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3251,6 +3465,19 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3452,6 +3679,20 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3653,6 +3894,20 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3836,6 +4091,19 @@ define amdgpu_kernel void @local_system_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4019,6 +4287,19 @@ define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4220,6 +4501,20 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4421,6 +4716,20 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4622,6 +4931,20 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4823,6 +5146,20 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5024,6 +5361,20 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5225,6 +5576,20 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5426,6 +5791,20 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5627,6 +6006,20 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5836,6 +6229,21 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6052,6 +6460,21 @@ define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6281,6 +6704,22 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6515,6 +6954,22 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6749,6 +7204,22 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6965,6 +7436,21 @@ define amdgpu_kernel void @local_system_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7181,6 +7667,21 @@ define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7415,6 +7916,22 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7649,6 +8166,22 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7883,6 +8416,22 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8117,6 +8666,22 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8351,6 +8916,22 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8585,6 +9166,22 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8819,6 +9416,22 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9053,6 +9666,22 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9226,6 +9855,18 @@ define amdgpu_kernel void @local_system_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") unordered, align 4
@@ -9397,6 +10038,18 @@ define amdgpu_kernel void @local_system_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") monotonic, align 4
@@ -9568,6 +10221,18 @@ define amdgpu_kernel void @local_system_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") acquire, align 4
@@ -9739,6 +10404,18 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") seq_cst, align 4
@@ -9880,6 +10557,16 @@ define amdgpu_kernel void @local_system_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") unordered, align 4
@@ -10020,6 +10707,16 @@ define amdgpu_kernel void @local_system_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") monotonic, align 4
@@ -10160,6 +10857,16 @@ define amdgpu_kernel void @local_system_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") release, align 4
@@ -10300,6 +11007,16 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") seq_cst, align 4
@@ -10440,6 +11157,16 @@ define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") monotonic
@@ -10580,6 +11307,16 @@ define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acquire
@@ -10720,6 +11457,16 @@ define amdgpu_kernel void @local_system_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") release
@@ -10860,6 +11607,16 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acq_rel
@@ -11000,6 +11757,16 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") seq_cst
@@ -11182,6 +11949,19 @@ define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acquire
@@ -11365,6 +12145,19 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acq_rel
@@ -11548,6 +12341,19 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") seq_cst
@@ -11715,6 +12521,18 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11882,6 +12700,18 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12049,6 +12879,18 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12216,6 +13058,18 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12383,6 +13237,18 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12550,6 +13416,18 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12717,6 +13595,18 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12884,6 +13774,18 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13051,6 +13953,18 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13218,6 +14132,18 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13385,6 +14311,18 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13552,6 +14490,18 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13719,6 +14669,18 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13886,6 +14848,18 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14053,6 +15027,18 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14262,6 +15248,21 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14473,6 +15474,21 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14684,6 +15700,21 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14895,6 +15926,21 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15106,6 +16152,21 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15317,6 +16378,21 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15528,6 +16604,21 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15739,6 +16830,21 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15950,6 +17056,21 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16161,6 +17282,21 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16372,6 +17508,21 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16583,6 +17734,21 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16794,6 +17960,21 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17005,6 +18186,21 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17216,6 +18412,21 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
index 5e5e3bf83d610..88cba0bddf5d7 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
@@ -8,6 +8,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @local_volatile_load_0(
; GFX6-LABEL: local_volatile_load_0:
@@ -141,6 +142,18 @@ define amdgpu_kernel void @local_volatile_load_0(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_volatile_load_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: ds_load_b32 v1, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(3) %in, align 4
@@ -308,6 +321,22 @@ define amdgpu_kernel void @local_volatile_load_1(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_volatile_load_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-CU-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s2
+; GFX1250-CU-NEXT: s_mov_b32 s2, 2
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3
+; GFX1250-CU-NEXT: ds_load_b32 v1, v1
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -429,6 +458,18 @@ define amdgpu_kernel void @local_volatile_store_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_volatile_store_0:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -570,6 +611,22 @@ define amdgpu_kernel void @local_volatile_store_1(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_volatile_store_1:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
+; GFX1250-CU-NEXT: s_wait_xcnt 0x0
+; GFX1250-CU-NEXT: s_mov_b32 s1, 0x3ff
+; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s1
+; GFX1250-CU-NEXT: s_mov_b32 s1, 2
+; GFX1250-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -698,6 +755,18 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_volatile_workgroup_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic volatile i32, ptr addrspace(3) %in syncscope("workgroup") acquire, align 4
@@ -813,6 +882,17 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_volatile_workgroup_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic volatile i32 %in, ptr addrspace(3) %out syncscope("workgroup") release, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
index b24622a48a16b..b8ad75049aff8 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
@@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
define amdgpu_kernel void @local_wavefront_unordered_load(
; GFX6-LABEL: local_wavefront_unordered_load:
@@ -177,6 +178,18 @@ define amdgpu_kernel void @local_wavefront_unordered_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") unordered, align 4
@@ -348,6 +361,18 @@ define amdgpu_kernel void @local_wavefront_monotonic_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") monotonic, align 4
@@ -519,6 +544,18 @@ define amdgpu_kernel void @local_wavefront_acquire_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") acquire, align 4
@@ -690,6 +727,18 @@ define amdgpu_kernel void @local_wavefront_seq_cst_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") seq_cst, align 4
@@ -831,6 +880,16 @@ define amdgpu_kernel void @local_wavefront_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") unordered, align 4
@@ -971,6 +1030,16 @@ define amdgpu_kernel void @local_wavefront_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") monotonic, align 4
@@ -1111,6 +1180,16 @@ define amdgpu_kernel void @local_wavefront_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") release, align 4
@@ -1251,6 +1330,16 @@ define amdgpu_kernel void @local_wavefront_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") seq_cst, align 4
@@ -1391,6 +1480,16 @@ define amdgpu_kernel void @local_wavefront_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") monotonic
@@ -1531,6 +1630,16 @@ define amdgpu_kernel void @local_wavefront_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acquire
@@ -1671,6 +1780,16 @@ define amdgpu_kernel void @local_wavefront_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") release
@@ -1811,6 +1930,16 @@ define amdgpu_kernel void @local_wavefront_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acq_rel
@@ -1951,6 +2080,16 @@ define amdgpu_kernel void @local_wavefront_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") seq_cst
@@ -2133,6 +2272,19 @@ define amdgpu_kernel void @local_wavefront_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acquire
@@ -2316,6 +2468,19 @@ define amdgpu_kernel void @local_wavefront_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acq_rel
@@ -2499,6 +2664,19 @@ define amdgpu_kernel void @local_wavefront_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") seq_cst
@@ -2666,6 +2844,18 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -2833,6 +3023,18 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3000,6 +3202,18 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3167,6 +3381,18 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3334,6 +3560,18 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3501,6 +3739,18 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3668,6 +3918,18 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3835,6 +4097,18 @@ define amdgpu_kernel void @local_wavefront_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4002,6 +4276,18 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4169,6 +4455,18 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4336,6 +4634,18 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4503,6 +4813,18 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4670,6 +4992,18 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4837,6 +5171,18 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5004,6 +5350,18 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5213,6 +5571,21 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5424,6 +5797,21 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5635,6 +6023,21 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5846,6 +6249,21 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6057,6 +6475,21 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6268,6 +6701,21 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6479,6 +6927,21 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6690,6 +7153,21 @@ define amdgpu_kernel void @local_wavefront_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6901,6 +7379,21 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7112,6 +7605,21 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7323,6 +7831,21 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7534,6 +8057,21 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7745,6 +8283,21 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7956,6 +8509,21 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8167,6 +8735,21 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8340,6 +8923,18 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_unordered_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") unordered, align 4
@@ -8511,6 +9106,18 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") monotonic, align 4
@@ -8682,6 +9289,18 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") acquire, align 4
@@ -8853,6 +9472,18 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_load:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: ds_load_b32 v1, v0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") seq_cst, align 4
@@ -8994,6 +9625,16 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_unordered_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") unordered, align 4
@@ -9134,6 +9775,16 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") monotonic, align 4
@@ -9274,6 +9925,16 @@ define amdgpu_kernel void @local_wavefront_one_as_release_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_release_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") release, align 4
@@ -9414,6 +10075,16 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_store(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_store:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") seq_cst, align 4
@@ -9554,6 +10225,16 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") monotonic
@@ -9694,6 +10375,16 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -9834,6 +10525,16 @@ define amdgpu_kernel void @local_wavefront_one_as_release_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_release_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") release
@@ -9974,6 +10675,16 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -10114,6 +10825,16 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -10296,6 +11017,19 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -10479,6 +11213,19 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -10662,6 +11409,19 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -10829,6 +11589,18 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -10996,6 +11768,18 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11163,6 +11947,18 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11330,6 +12126,18 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11497,6 +12305,18 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11664,6 +12484,18 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11831,6 +12663,18 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11998,6 +12842,18 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_release_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12165,6 +13021,18 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12332,6 +13200,18 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12499,6 +13379,18 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12666,6 +13558,18 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12833,6 +13737,18 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13000,6 +13916,18 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13167,6 +14095,18 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: v_mov_b32_e32 v2, s0
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13376,6 +14316,21 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_ret_cmpxch
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13587,6 +14542,21 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13798,6 +14768,21 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14009,6 +14994,21 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14220,6 +15220,21 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14431,6 +15446,21 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14642,6 +15672,21 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14853,6 +15898,21 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15064,6 +16124,21 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15275,6 +16350,21 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15486,6 +16576,21 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15697,6 +16802,21 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15908,6 +17028,21 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16119,6 +17254,21 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16330,6 +17480,21 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
+;
+; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250-CU: ; %bb.0: ; %entry
+; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-CU-NEXT: s_wait_dscnt 0x0
+; GFX1250-CU-NEXT: ds_store_b32 v0, v1
+; GFX1250-CU-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
>From 7f7b1bbc3f017bb93ada95a48cc400d4ec9a297d Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 21 Aug 2025 12:43:36 +0200
Subject: [PATCH 2/2] Drop -CU suffix
---
.../memory-legalizer-fence-mmra-global.ll | 294 +-
.../memory-legalizer-fence-mmra-local.ll | 152 +-
.../CodeGen/AMDGPU/memory-legalizer-fence.ll | 398 +-
.../AMDGPU/memory-legalizer-flat-agent.ll | 3118 ++++++++--------
.../AMDGPU/memory-legalizer-flat-lastuse.ll | 94 +-
.../memory-legalizer-flat-nontemporal.ll | 120 +-
.../memory-legalizer-flat-singlethread.ll | 2298 ++++++------
.../AMDGPU/memory-legalizer-flat-system.ll | 3234 ++++++++---------
.../AMDGPU/memory-legalizer-flat-volatile.ll | 148 +-
.../AMDGPU/memory-legalizer-flat-wavefront.ll | 2268 ++++++------
.../AMDGPU/memory-legalizer-flat-workgroup.ll | 2270 ++++++------
.../AMDGPU/memory-legalizer-global-agent.ll | 3044 ++++++++--------
.../AMDGPU/memory-legalizer-global-lastuse.ll | 104 +-
.../memory-legalizer-global-nontemporal.ll | 124 +-
.../memory-legalizer-global-singlethread.ll | 2298 ++++++------
.../AMDGPU/memory-legalizer-global-system.ll | 2998 +++++++--------
.../memory-legalizer-global-volatile.ll | 150 +-
.../memory-legalizer-global-wavefront.ll | 2298 ++++++------
.../memory-legalizer-global-workgroup.ll | 2358 ++++++------
.../AMDGPU/memory-legalizer-local-agent.ll | 2238 ++++++------
.../memory-legalizer-local-nontemporal.ll | 128 +-
.../memory-legalizer-local-singlethread.ll | 2146 +++++------
.../AMDGPU/memory-legalizer-local-system.ll | 2238 ++++++------
.../AMDGPU/memory-legalizer-local-volatile.ll | 148 +-
.../memory-legalizer-local-wavefront.ll | 2146 +++++------
.../memory-legalizer-local-workgroup.ll | 2238 ++++++------
26 files changed, 19525 insertions(+), 19525 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
index 20822c71198b6..97d52d5f1f26d 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX6-LABEL: workgroup_acquire_fence:
@@ -80,9 +80,9 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -151,9 +151,9 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -227,9 +227,9 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -303,9 +303,9 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -377,9 +377,9 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -448,9 +448,9 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -524,9 +524,9 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -600,9 +600,9 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -696,12 +696,12 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -785,13 +785,13 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -891,14 +891,14 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -998,14 +998,14 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1099,12 +1099,12 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1188,13 +1188,13 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1294,14 +1294,14 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1401,14 +1401,14 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1504,12 +1504,12 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1597,14 +1597,14 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1710,15 +1710,15 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1824,15 +1824,15 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -1928,12 +1928,12 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -2021,14 +2021,14 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -2134,15 +2134,15 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@@ -2248,15 +2248,15 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
index 767dbc1432242..cc42428e1aa06 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX6-LABEL: workgroup_acquire_fence:
@@ -78,10 +78,10 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -149,9 +149,9 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -219,9 +219,9 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -289,9 +289,9 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -350,9 +350,9 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -411,9 +411,9 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -472,9 +472,9 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -533,9 +533,9 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -605,10 +605,10 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -676,9 +676,9 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -746,9 +746,9 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -816,9 +816,9 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -877,9 +877,9 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -938,9 +938,9 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -999,9 +999,9 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1060,9 +1060,9 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1132,10 +1132,10 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1203,9 +1203,9 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1273,9 +1273,9 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1343,9 +1343,9 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1404,9 +1404,9 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1465,9 +1465,9 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1526,9 +1526,9 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@@ -1587,9 +1587,9 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index 8d7194b834385..b3f6533d43887 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX6-LABEL: singlethread_acquire_fence:
@@ -67,9 +67,9 @@ define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: singlethread_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: singlethread_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acquire
ret void
@@ -128,9 +128,9 @@ define amdgpu_kernel void @singlethread_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: singlethread_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: singlethread_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread") release
ret void
@@ -189,9 +189,9 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: singlethread_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: singlethread_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acq_rel
ret void
@@ -250,9 +250,9 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: singlethread_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: singlethread_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread") seq_cst
ret void
@@ -311,9 +311,9 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: singlethread_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: singlethread_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acquire
ret void
@@ -372,9 +372,9 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: singlethread_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: singlethread_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") release
ret void
@@ -433,9 +433,9 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: singlethread_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: singlethread_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
@@ -494,9 +494,9 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: singlethread_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: singlethread_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
@@ -555,9 +555,9 @@ define amdgpu_kernel void @wavefront_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: wavefront_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: wavefront_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acquire
ret void
@@ -616,9 +616,9 @@ define amdgpu_kernel void @wavefront_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: wavefront_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: wavefront_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront") release
ret void
@@ -677,9 +677,9 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: wavefront_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: wavefront_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acq_rel
ret void
@@ -738,9 +738,9 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: wavefront_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: wavefront_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront") seq_cst
ret void
@@ -799,9 +799,9 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: wavefront_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: wavefront_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acquire
ret void
@@ -860,9 +860,9 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: wavefront_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: wavefront_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") release
ret void
@@ -921,9 +921,9 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: wavefront_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: wavefront_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
@@ -982,9 +982,9 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: wavefront_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: wavefront_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
@@ -1064,10 +1064,10 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
ret void
@@ -1144,10 +1144,10 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
ret void
@@ -1229,10 +1229,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
ret void
@@ -1314,10 +1314,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
ret void
@@ -1389,9 +1389,9 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
ret void
@@ -1460,9 +1460,9 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
ret void
@@ -1536,9 +1536,9 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
@@ -1612,9 +1612,9 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
@@ -1708,12 +1708,12 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire
ret void
@@ -1797,13 +1797,13 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") release
ret void
@@ -1903,14 +1903,14 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel
ret void
@@ -2010,14 +2010,14 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst
ret void
@@ -2111,12 +2111,12 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire
ret void
@@ -2200,13 +2200,13 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release
ret void
@@ -2306,14 +2306,14 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel
ret void
@@ -2413,14 +2413,14 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: agent_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: agent_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst
ret void
@@ -2516,12 +2516,12 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence acquire
ret void
@@ -2609,14 +2609,14 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence release
ret void
@@ -2722,15 +2722,15 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence acq_rel
ret void
@@ -2836,15 +2836,15 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence seq_cst
ret void
@@ -2940,12 +2940,12 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_acquire_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_acquire_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acquire
ret void
@@ -3033,14 +3033,14 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_release_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_release_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") release
ret void
@@ -3146,15 +3146,15 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_acq_rel_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_acq_rel_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel
ret void
@@ -3260,15 +3260,15 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: system_one_as_seq_cst_fence:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: system_one_as_seq_cst_fence:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
index 05b599c6bc1c7..36adbc0011118 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
@@ -11,7 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_agent_unordered_load(
; GFX7-LABEL: flat_agent_unordered_load:
@@ -189,16 +189,16 @@ define amdgpu_kernel void @flat_agent_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent") unordered, align 4
@@ -382,16 +382,16 @@ define amdgpu_kernel void @flat_agent_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent") monotonic, align 4
@@ -590,17 +590,17 @@ define amdgpu_kernel void @flat_agent_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent") acquire, align 4
@@ -825,23 +825,23 @@ define amdgpu_kernel void @flat_agent_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent") seq_cst, align 4
@@ -993,15 +993,15 @@ define amdgpu_kernel void @flat_agent_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent") unordered, align 4
@@ -1152,15 +1152,15 @@ define amdgpu_kernel void @flat_agent_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent") monotonic, align 4
@@ -1335,19 +1335,19 @@ define amdgpu_kernel void @flat_agent_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent") release, align 4
@@ -1522,19 +1522,19 @@ define amdgpu_kernel void @flat_agent_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent") seq_cst, align 4
@@ -1685,15 +1685,15 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") monotonic
@@ -1875,17 +1875,17 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acquire
@@ -2060,19 +2060,19 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") release
@@ -2278,21 +2278,21 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acq_rel
@@ -2498,21 +2498,21 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") seq_cst
@@ -2722,18 +2722,18 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acquire
@@ -2972,24 +2972,24 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") acq_rel
@@ -3228,24 +3228,24 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent") seq_cst
@@ -3485,19 +3485,19 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3768,21 +3768,21 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4046,23 +4046,23 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4357,25 +4357,25 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4670,25 +4670,25 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4959,21 +4959,21 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5244,21 +5244,21 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5553,25 +5553,25 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5866,25 +5866,25 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6179,25 +6179,25 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6492,25 +6492,25 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6805,25 +6805,25 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7118,25 +7118,25 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7431,25 +7431,25 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7744,25 +7744,25 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8046,21 +8046,21 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8361,22 +8361,22 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8686,25 +8686,25 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9033,28 +9033,28 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9383,28 +9383,28 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9709,24 +9709,24 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10027,22 +10027,22 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10371,28 +10371,28 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10721,28 +10721,28 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11071,28 +11071,28 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11421,28 +11421,28 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11767,26 +11767,26 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -12115,28 +12115,28 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -12465,28 +12465,28 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -12815,28 +12815,28 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -13022,16 +13022,16 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent-one-as") unordered, align 4
@@ -13215,16 +13215,16 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent-one-as") monotonic, align 4
@@ -13433,18 +13433,18 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent-one-as") acquire, align 4
@@ -13679,24 +13679,24 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("agent-one-as") seq_cst, align 4
@@ -13848,15 +13848,15 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent-one-as") unordered, align 4
@@ -14007,15 +14007,15 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent-one-as") monotonic, align 4
@@ -14190,19 +14190,19 @@ define amdgpu_kernel void @flat_agent_one_as_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent-one-as") release, align 4
@@ -14377,19 +14377,19 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("agent-one-as") seq_cst, align 4
@@ -14540,15 +14540,15 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") monotonic
@@ -14726,17 +14726,17 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acquire
@@ -14911,19 +14911,19 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") release
@@ -15125,21 +15125,21 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -15341,21 +15341,21 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -15575,19 +15575,19 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acquire
@@ -15836,25 +15836,25 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -16103,25 +16103,25 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -16361,19 +16361,19 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16640,21 +16640,21 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16918,23 +16918,23 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17225,25 +17225,25 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17534,25 +17534,25 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17819,21 +17819,21 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18100,21 +18100,21 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18405,25 +18405,25 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18714,25 +18714,25 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19023,25 +19023,25 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19332,25 +19332,25 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19641,25 +19641,25 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19950,25 +19950,25 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20259,25 +20259,25 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20568,25 +20568,25 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20870,21 +20870,21 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21195,23 +21195,23 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21521,25 +21521,25 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21878,29 +21878,29 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22239,29 +22239,29 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22576,25 +22576,25 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22905,23 +22905,23 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23260,29 +23260,29 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23621,29 +23621,29 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23982,29 +23982,29 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -24343,29 +24343,29 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -24700,27 +24700,27 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -25059,29 +25059,29 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -25420,29 +25420,29 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -25781,29 +25781,29 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
index 0b5e6e9da7418..8d98f532908fe 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
; GFX12-LABEL: flat_last_use_load_0:
@@ -18,16 +18,16 @@ define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_last_use_load_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_last_use_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
entry:
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr %out
@@ -68,20 +68,20 @@ define amdgpu_kernel void @flat_last_use_load_1(ptr %in, ptr %out) {
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_last_use_load_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_last_use_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr %in, i32 %tid
@@ -108,18 +108,18 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_last_use_and_volatile_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_last_use_and_volatile_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
entry:
%val = load volatile i32, ptr %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr %out
@@ -141,16 +141,16 @@ define amdgpu_kernel void @flat_last_use_and_nontemporal_load(ptr %in, ptr %out)
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_last_use_and_nontemporal_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_last_use_and_nontemporal_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
entry:
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}, !nontemporal !0
store i32 %val, ptr %out
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
index 58f33dfed87d6..af48eaf8fcda6 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
@@ -11,7 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX7-LABEL: flat_nontemporal_load_0:
@@ -189,16 +189,16 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_nontemporal_load_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_nontemporal_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load i32, ptr %in, align 4, !nontemporal !0
@@ -568,20 +568,20 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_nontemporal_load_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_nontemporal_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -767,16 +767,16 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_nontemporal_store_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] th:TH_STORE_NT scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_nontemporal_store_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] th:TH_STORE_NT scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load i32, ptr %in, align 4
@@ -1134,19 +1134,19 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_nontemporal_store_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3]
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s2, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s2
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_nontemporal_store_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3]
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1346,18 +1346,18 @@ define amdgpu_kernel void @flat_nontemporal_volatile_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_nontemporal_volatile_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_nontemporal_volatile_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load volatile i32, ptr %in, align 4, !nontemporal !0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
index ed90e278d1e86..871c941dd6dca 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
@@ -11,7 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_singlethread_unordered_load(
; GFX7-LABEL: flat_singlethread_unordered_load:
@@ -189,16 +189,16 @@ define amdgpu_kernel void @flat_singlethread_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread") unordered, align 4
@@ -382,16 +382,16 @@ define amdgpu_kernel void @flat_singlethread_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread") monotonic, align 4
@@ -575,16 +575,16 @@ define amdgpu_kernel void @flat_singlethread_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread") acquire, align 4
@@ -768,16 +768,16 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread") seq_cst, align 4
@@ -929,15 +929,15 @@ define amdgpu_kernel void @flat_singlethread_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread") unordered, align 4
@@ -1088,15 +1088,15 @@ define amdgpu_kernel void @flat_singlethread_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread") monotonic, align 4
@@ -1247,15 +1247,15 @@ define amdgpu_kernel void @flat_singlethread_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread") release, align 4
@@ -1406,15 +1406,15 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread") seq_cst, align 4
@@ -1565,15 +1565,15 @@ define amdgpu_kernel void @flat_singlethread_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") monotonic
@@ -1724,15 +1724,15 @@ define amdgpu_kernel void @flat_singlethread_acquire_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acquire
@@ -1883,15 +1883,15 @@ define amdgpu_kernel void @flat_singlethread_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") release
@@ -2042,15 +2042,15 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acq_rel
@@ -2201,15 +2201,15 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") seq_cst
@@ -2404,17 +2404,17 @@ define amdgpu_kernel void @flat_singlethread_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acquire
@@ -2610,17 +2610,17 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") acq_rel
@@ -2816,17 +2816,17 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread") seq_cst
@@ -3066,19 +3066,19 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3318,19 +3318,19 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3570,19 +3570,19 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3822,19 +3822,19 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4074,19 +4074,19 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4326,19 +4326,19 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4578,19 +4578,19 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4830,19 +4830,19 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5082,19 +5082,19 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5334,19 +5334,19 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5586,19 +5586,19 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5838,19 +5838,19 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6090,19 +6090,19 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6342,19 +6342,19 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6594,19 +6594,19 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6890,21 +6890,21 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7190,21 +7190,21 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7490,21 +7490,21 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7790,21 +7790,21 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8090,21 +8090,21 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8390,21 +8390,21 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8690,21 +8690,21 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8990,21 +8990,21 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9290,21 +9290,21 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9590,21 +9590,21 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9890,21 +9890,21 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10190,21 +10190,21 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10490,21 +10490,21 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10790,21 +10790,21 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11090,21 +11090,21 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11290,16 +11290,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread-one-as") unordered, align 4
@@ -11483,16 +11483,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread-one-as") monotonic, align 4
@@ -11676,16 +11676,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread-one-as") acquire, align 4
@@ -11869,16 +11869,16 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("singlethread-one-as") seq_cst, align 4
@@ -12030,15 +12030,15 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread-one-as") unordered, align 4
@@ -12189,15 +12189,15 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread-one-as") monotonic, align 4
@@ -12348,15 +12348,15 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread-one-as") release, align 4
@@ -12507,15 +12507,15 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("singlethread-one-as") seq_cst, align 4
@@ -12666,15 +12666,15 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") monotonic
@@ -12825,15 +12825,15 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -12984,15 +12984,15 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") release
@@ -13143,15 +13143,15 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -13302,15 +13302,15 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -13505,17 +13505,17 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -13711,17 +13711,17 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -13917,17 +13917,17 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -14167,19 +14167,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14419,19 +14419,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14671,19 +14671,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14923,19 +14923,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15175,19 +15175,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15427,19 +15427,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15679,19 +15679,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15931,19 +15931,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16183,19 +16183,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16435,19 +16435,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16687,19 +16687,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16939,19 +16939,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17191,19 +17191,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17443,19 +17443,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17695,19 +17695,19 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17991,21 +17991,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18291,21 +18291,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18591,21 +18591,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18891,21 +18891,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19191,21 +19191,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19491,21 +19491,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19791,21 +19791,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20091,21 +20091,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20391,21 +20391,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20691,21 +20691,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20991,21 +20991,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21291,21 +21291,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21591,21 +21591,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21891,21 +21891,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22191,21 +22191,21 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
index 3c6ffdd1a6332..9d70a2437e553 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
@@ -11,7 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_system_unordered_load(
; GFX7-LABEL: flat_system_unordered_load:
@@ -189,16 +189,16 @@ define amdgpu_kernel void @flat_system_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in unordered, align 4
@@ -382,16 +382,16 @@ define amdgpu_kernel void @flat_system_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in monotonic, align 4
@@ -592,17 +592,17 @@ define amdgpu_kernel void @flat_system_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in acquire, align 4
@@ -829,23 +829,23 @@ define amdgpu_kernel void @flat_system_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in seq_cst, align 4
@@ -997,15 +997,15 @@ define amdgpu_kernel void @flat_system_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out unordered, align 4
@@ -1156,15 +1156,15 @@ define amdgpu_kernel void @flat_system_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out monotonic, align 4
@@ -1343,20 +1343,20 @@ define amdgpu_kernel void @flat_system_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out release, align 4
@@ -1535,20 +1535,20 @@ define amdgpu_kernel void @flat_system_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out seq_cst, align 4
@@ -1699,15 +1699,15 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in monotonic
@@ -1891,17 +1891,17 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in acquire
@@ -2080,20 +2080,20 @@ define amdgpu_kernel void @flat_system_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in release
@@ -2305,22 +2305,22 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in acq_rel
@@ -2532,22 +2532,22 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst
@@ -2759,18 +2759,18 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in acquire
@@ -3015,25 +3015,25 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in acq_rel
@@ -3278,25 +3278,25 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in seq_cst
@@ -3536,19 +3536,19 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3821,21 +3821,21 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4103,24 +4103,24 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4421,26 +4421,26 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4741,26 +4741,26 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5033,21 +5033,21 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5320,21 +5320,21 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5635,26 +5635,26 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5955,26 +5955,26 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6275,26 +6275,26 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6595,26 +6595,26 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6915,26 +6915,26 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7235,26 +7235,26 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7555,26 +7555,26 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7875,26 +7875,26 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8178,21 +8178,21 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8495,22 +8495,22 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8824,26 +8824,26 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9178,29 +9178,29 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9535,29 +9535,29 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9864,24 +9864,24 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10184,22 +10184,22 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10534,29 +10534,29 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10891,29 +10891,29 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11248,29 +11248,29 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11605,29 +11605,29 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11958,27 +11958,27 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -12313,29 +12313,29 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -12670,29 +12670,29 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -13027,29 +13027,29 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -13235,16 +13235,16 @@ define amdgpu_kernel void @flat_system_one_as_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("one-as") unordered, align 4
@@ -13428,16 +13428,16 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("one-as") monotonic, align 4
@@ -13648,18 +13648,18 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("one-as") acquire, align 4
@@ -13896,24 +13896,24 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("one-as") seq_cst, align 4
@@ -14065,15 +14065,15 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("one-as") unordered, align 4
@@ -14224,15 +14224,15 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("one-as") monotonic, align 4
@@ -14411,20 +14411,20 @@ define amdgpu_kernel void @flat_system_one_as_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("one-as") release, align 4
@@ -14603,20 +14603,20 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("one-as") seq_cst, align 4
@@ -14767,15 +14767,15 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") monotonic
@@ -14955,17 +14955,17 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acquire
@@ -15144,20 +15144,20 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") release
@@ -15365,22 +15365,22 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acq_rel
@@ -15588,22 +15588,22 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") seq_cst
@@ -15825,19 +15825,19 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acquire
@@ -16092,26 +16092,26 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") acq_rel
@@ -16366,26 +16366,26 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("one-as") seq_cst
@@ -16625,19 +16625,19 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16906,21 +16906,21 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17188,24 +17188,24 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17502,26 +17502,26 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17818,26 +17818,26 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18106,21 +18106,21 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18389,21 +18389,21 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18700,26 +18700,26 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19016,26 +19016,26 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19332,26 +19332,26 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19648,26 +19648,26 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19964,26 +19964,26 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20280,26 +20280,26 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20596,26 +20596,26 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20912,26 +20912,26 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21215,21 +21215,21 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21542,23 +21542,23 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21872,26 +21872,26 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22236,30 +22236,30 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22604,30 +22604,30 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22944,25 +22944,25 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23275,23 +23275,23 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -23636,30 +23636,30 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -24004,30 +24004,30 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -24372,30 +24372,30 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -24740,30 +24740,30 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -25104,28 +25104,28 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -25470,30 +25470,30 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -25838,30 +25838,30 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -26206,30 +26206,30 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
index e0cf88891e421..43f015c3a2e0f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
@@ -7,7 +7,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX7-LABEL: flat_nontemporal_load_0:
@@ -145,18 +145,18 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_nontemporal_load_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_nontemporal_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load volatile i32, ptr %in, align 4
@@ -430,22 +430,22 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_nontemporal_load_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_nontemporal_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -595,17 +595,17 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_nontemporal_store_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_nontemporal_store_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load i32, ptr %in, align 4
@@ -875,20 +875,20 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_nontemporal_store_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3]
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s2, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s2
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_nontemporal_store_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3]
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1030,16 +1030,16 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_volatile_workgroup_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_volatile_workgroup_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic volatile i32, ptr %in syncscope("workgroup") acquire, align 4
@@ -1160,16 +1160,16 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_volatile_workgroup_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_volatile_workgroup_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic volatile i32 %in, ptr %out syncscope("workgroup") release, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
index 16135f5fc5d6f..f086542b3d1f8 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
@@ -11,7 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_wavefront_unordered_load(
; GFX7-LABEL: flat_wavefront_unordered_load:
@@ -189,16 +189,16 @@ define amdgpu_kernel void @flat_wavefront_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront") unordered, align 4
@@ -382,16 +382,16 @@ define amdgpu_kernel void @flat_wavefront_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront") monotonic, align 4
@@ -575,16 +575,16 @@ define amdgpu_kernel void @flat_wavefront_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront") acquire, align 4
@@ -768,16 +768,16 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront") seq_cst, align 4
@@ -929,15 +929,15 @@ define amdgpu_kernel void @flat_wavefront_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront") unordered, align 4
@@ -1088,15 +1088,15 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront") monotonic, align 4
@@ -1247,15 +1247,15 @@ define amdgpu_kernel void @flat_wavefront_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront") release, align 4
@@ -1406,15 +1406,15 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront") seq_cst, align 4
@@ -1565,15 +1565,15 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") monotonic
@@ -1724,15 +1724,15 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acquire
@@ -1883,15 +1883,15 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") release
@@ -2042,15 +2042,15 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acq_rel
@@ -2201,15 +2201,15 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") seq_cst
@@ -2404,17 +2404,17 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acquire
@@ -2610,17 +2610,17 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") acq_rel
@@ -2816,17 +2816,17 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront") seq_cst
@@ -3066,19 +3066,19 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3318,19 +3318,19 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3570,19 +3570,19 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3822,19 +3822,19 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4074,19 +4074,19 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4326,19 +4326,19 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4578,19 +4578,19 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4830,19 +4830,19 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5082,19 +5082,19 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5334,19 +5334,19 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5586,19 +5586,19 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5838,19 +5838,19 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6090,19 +6090,19 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6342,19 +6342,19 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6594,19 +6594,19 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6890,21 +6890,21 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7190,21 +7190,21 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7490,21 +7490,21 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7790,21 +7790,21 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8090,21 +8090,21 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8390,21 +8390,21 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8690,21 +8690,21 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8990,21 +8990,21 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9290,21 +9290,21 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9590,21 +9590,21 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9890,21 +9890,21 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10190,21 +10190,21 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10490,21 +10490,21 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10790,21 +10790,21 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11090,21 +11090,21 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11290,16 +11290,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront-one-as") unordered, align 4
@@ -11483,16 +11483,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront-one-as") monotonic, align 4
@@ -11676,16 +11676,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront-one-as") acquire, align 4
@@ -11869,16 +11869,16 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("wavefront-one-as") seq_cst, align 4
@@ -12030,15 +12030,15 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront-one-as") unordered, align 4
@@ -12189,15 +12189,15 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront-one-as") monotonic, align 4
@@ -12348,15 +12348,15 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront-one-as") release, align 4
@@ -12507,15 +12507,15 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("wavefront-one-as") seq_cst, align 4
@@ -12666,15 +12666,15 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") monotonic
@@ -12825,15 +12825,15 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -12984,15 +12984,15 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") release
@@ -13143,15 +13143,15 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -13302,15 +13302,15 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -13505,17 +13505,17 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -13711,17 +13711,17 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -13917,17 +13917,17 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -14167,19 +14167,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14419,19 +14419,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14671,19 +14671,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14923,19 +14923,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15175,19 +15175,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15427,19 +15427,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15679,19 +15679,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15931,19 +15931,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16183,19 +16183,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16435,19 +16435,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16687,19 +16687,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16939,19 +16939,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17191,19 +17191,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17443,19 +17443,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17695,19 +17695,19 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17991,21 +17991,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18291,21 +18291,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18591,21 +18591,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18891,21 +18891,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19191,21 +19191,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19491,21 +19491,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19791,21 +19791,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20091,21 +20091,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20391,21 +20391,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20691,21 +20691,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20991,21 +20991,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21291,21 +21291,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21591,21 +21591,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21891,21 +21891,21 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
index 18e6812ded962..d8e6ad043e061 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
@@ -11,7 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_workgroup_unordered_load(
; GFX7-LABEL: flat_workgroup_unordered_load:
@@ -189,16 +189,16 @@ define amdgpu_kernel void @flat_workgroup_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup") unordered, align 4
@@ -382,16 +382,16 @@ define amdgpu_kernel void @flat_workgroup_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup") monotonic, align 4
@@ -587,16 +587,16 @@ define amdgpu_kernel void @flat_workgroup_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup") acquire, align 4
@@ -811,17 +811,17 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup") seq_cst, align 4
@@ -973,15 +973,15 @@ define amdgpu_kernel void @flat_workgroup_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup") unordered, align 4
@@ -1132,15 +1132,15 @@ define amdgpu_kernel void @flat_workgroup_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup") monotonic, align 4
@@ -1308,16 +1308,16 @@ define amdgpu_kernel void @flat_workgroup_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup") release, align 4
@@ -1485,16 +1485,16 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup") seq_cst, align 4
@@ -1645,15 +1645,15 @@ define amdgpu_kernel void @flat_workgroup_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") monotonic
@@ -1823,16 +1823,16 @@ define amdgpu_kernel void @flat_workgroup_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acquire
@@ -2000,16 +2000,16 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") release
@@ -2196,17 +2196,17 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acq_rel
@@ -2393,17 +2393,17 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") seq_cst
@@ -2610,17 +2610,17 @@ define amdgpu_kernel void @flat_workgroup_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acquire
@@ -2847,18 +2847,18 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") acq_rel
@@ -3085,18 +3085,18 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup") seq_cst
@@ -3336,19 +3336,19 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3607,20 +3607,20 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -3877,20 +3877,20 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4166,21 +4166,21 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4456,21 +4456,21 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -4729,20 +4729,20 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5001,20 +5001,20 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5290,21 +5290,21 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5580,21 +5580,21 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -5870,21 +5870,21 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6160,21 +6160,21 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6458,21 +6458,21 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -6770,21 +6770,21 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7087,22 +7087,22 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7419,22 +7419,22 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -7751,22 +7751,22 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8066,21 +8066,21 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8378,21 +8378,21 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -8709,22 +8709,22 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9041,22 +9041,22 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9373,22 +9373,22 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -9705,22 +9705,22 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10035,22 +10035,22 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10367,22 +10367,22 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -10699,22 +10699,22 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11031,22 +11031,22 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -11232,16 +11232,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup-one-as") unordered, align 4
@@ -11425,16 +11425,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup-one-as") monotonic, align 4
@@ -11626,16 +11626,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup-one-as") acquire, align 4
@@ -11839,16 +11839,16 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic i32, ptr %in syncscope("workgroup-one-as") seq_cst, align 4
@@ -12000,15 +12000,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup-one-as") unordered, align 4
@@ -12159,15 +12159,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup-one-as") monotonic, align 4
@@ -12328,15 +12328,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup-one-as") release, align 4
@@ -12497,15 +12497,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic i32 %in, ptr %out syncscope("workgroup-one-as") seq_cst, align 4
@@ -12656,15 +12656,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") monotonic
@@ -12825,15 +12825,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -12994,15 +12994,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") release
@@ -13173,15 +13173,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -13352,15 +13352,15 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -13563,17 +13563,17 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -13789,17 +13789,17 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -14015,17 +14015,17 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -14265,19 +14265,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14527,19 +14527,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -14789,19 +14789,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15061,19 +15061,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15333,19 +15333,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15595,19 +15595,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -15857,19 +15857,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16129,19 +16129,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16401,19 +16401,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16673,19 +16673,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -16945,19 +16945,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17217,19 +17217,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17489,19 +17489,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -17761,19 +17761,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18033,19 +18033,19 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18329,21 +18329,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18637,21 +18637,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -18947,21 +18947,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19267,21 +19267,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19587,21 +19587,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -19897,21 +19897,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20205,21 +20205,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20525,21 +20525,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -20845,21 +20845,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21165,21 +21165,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21485,21 +21485,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -21803,21 +21803,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22123,21 +22123,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22443,21 +22443,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
@@ -22763,21 +22763,21 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
index 51859c112bf9f..184e15406bfbc 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_agent_unordered_load(
; GFX6-LABEL: global_agent_unordered_load:
@@ -192,16 +192,16 @@ define amdgpu_kernel void @global_agent_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent") unordered, align 4
@@ -387,16 +387,16 @@ define amdgpu_kernel void @global_agent_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent") monotonic, align 4
@@ -598,17 +598,17 @@ define amdgpu_kernel void @global_agent_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent") acquire, align 4
@@ -829,23 +829,23 @@ define amdgpu_kernel void @global_agent_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent") seq_cst, align 4
@@ -1004,15 +1004,15 @@ define amdgpu_kernel void @global_agent_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") unordered, align 4
@@ -1170,15 +1170,15 @@ define amdgpu_kernel void @global_agent_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 4
@@ -1361,19 +1361,19 @@ define amdgpu_kernel void @global_agent_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") release, align 4
@@ -1556,19 +1556,19 @@ define amdgpu_kernel void @global_agent_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 4
@@ -1724,15 +1724,15 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") monotonic
@@ -1917,17 +1917,17 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire
@@ -2108,19 +2108,19 @@ define amdgpu_kernel void @global_agent_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") release
@@ -2330,21 +2330,21 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel
@@ -2554,21 +2554,21 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
@@ -2768,18 +2768,18 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire
@@ -3009,24 +3009,24 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel
@@ -3256,24 +3256,24 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
@@ -3494,19 +3494,19 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3756,21 +3756,21 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4016,23 +4016,23 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4307,25 +4307,25 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4600,25 +4600,25 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4868,21 +4868,21 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5132,21 +5132,21 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5421,25 +5421,25 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5714,25 +5714,25 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6007,25 +6007,25 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6300,25 +6300,25 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6593,25 +6593,25 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6886,25 +6886,25 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7179,25 +7179,25 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7472,25 +7472,25 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7741,21 +7741,21 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8024,22 +8024,22 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8317,25 +8317,25 @@ define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8633,28 +8633,28 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8952,28 +8952,28 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9246,24 +9246,24 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9532,22 +9532,22 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9845,28 +9845,28 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10164,28 +10164,28 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10483,28 +10483,28 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10802,28 +10802,28 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -11117,26 +11117,26 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -11434,28 +11434,28 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -11753,28 +11753,28 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -12072,28 +12072,28 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -12281,16 +12281,16 @@ define amdgpu_kernel void @global_agent_one_as_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") unordered, align 4
@@ -12476,16 +12476,16 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") monotonic, align 4
@@ -12687,17 +12687,17 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") acquire, align 4
@@ -12918,23 +12918,23 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") seq_cst, align 4
@@ -13093,15 +13093,15 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") unordered, align 4
@@ -13259,15 +13259,15 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") monotonic, align 4
@@ -13450,19 +13450,19 @@ define amdgpu_kernel void @global_agent_one_as_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") release, align 4
@@ -13645,19 +13645,19 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") seq_cst, align 4
@@ -13813,15 +13813,15 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") monotonic
@@ -14006,17 +14006,17 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire
@@ -14197,19 +14197,19 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") release
@@ -14419,21 +14419,21 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -14643,21 +14643,21 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -14857,18 +14857,18 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire
@@ -15098,24 +15098,24 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -15345,24 +15345,24 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -15583,19 +15583,19 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15845,21 +15845,21 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16105,23 +16105,23 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16396,25 +16396,25 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16689,25 +16689,25 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16957,21 +16957,21 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17221,21 +17221,21 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17510,25 +17510,25 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17803,25 +17803,25 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18096,25 +18096,25 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18389,25 +18389,25 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18682,25 +18682,25 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18975,25 +18975,25 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19268,25 +19268,25 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19561,25 +19561,25 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19830,21 +19830,21 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20113,22 +20113,22 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20426,28 +20426,28 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20745,28 +20745,28 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21039,24 +21039,24 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21325,22 +21325,22 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21638,28 +21638,28 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21957,28 +21957,28 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -22276,28 +22276,28 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -22595,28 +22595,28 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -22910,26 +22910,26 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -23227,28 +23227,28 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -23546,28 +23546,28 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -23865,28 +23865,28 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_DEV
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
index 2c8fa9da74862..ed2d62356f8f2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX12-LABEL: global_last_use_load_0:
@@ -16,17 +16,17 @@ define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addr
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_last_use_load_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_last_use_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
entry:
%val = load i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr addrspace(1) %out
@@ -51,20 +51,20 @@ define amdgpu_kernel void @global_last_use_load_1(ptr addrspace(1) %in, ptr addr
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_last_use_load_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_last_use_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
@@ -87,18 +87,18 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_last_use_and_volatile_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_last_use_and_volatile_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr addrspace(1) %out
@@ -123,20 +123,20 @@ define amdgpu_kernel void @global_last_use_and_nontemporal_load(ptr addrspace(1)
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_last_use_and_nontemporal_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_last_use_and_nontemporal_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
index e73300dbc5ac6..0ad64f5599fe7 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_nontemporal_load_0(
; GFX6-LABEL: global_nontemporal_load_0:
@@ -191,17 +191,17 @@ define amdgpu_kernel void @global_nontemporal_load_0(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_nontemporal_load_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_nontemporal_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4, !nontemporal !0
@@ -462,20 +462,20 @@ define amdgpu_kernel void @global_nontemporal_load_1(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_nontemporal_load_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_nontemporal_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -662,17 +662,17 @@ define amdgpu_kernel void @global_nontemporal_store_0(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_nontemporal_store_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_nontemporal_store_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -907,19 +907,19 @@ define amdgpu_kernel void @global_nontemporal_store_1(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_nontemporal_store_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s3, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s3
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_nontemporal_store_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v0, v0, s3
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1111,18 +1111,18 @@ define amdgpu_kernel void @global_nontemporal_volatile_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_nontemporal_volatile_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_nontemporal_volatile_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4, !nontemporal !0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
index 2633bba70ddd3..6a5a6e01c741b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_singlethread_unordered_load(
; GFX6-LABEL: global_singlethread_unordered_load:
@@ -192,16 +192,16 @@ define amdgpu_kernel void @global_singlethread_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") unordered, align 4
@@ -387,16 +387,16 @@ define amdgpu_kernel void @global_singlethread_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") monotonic, align 4
@@ -582,16 +582,16 @@ define amdgpu_kernel void @global_singlethread_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") acquire, align 4
@@ -777,16 +777,16 @@ define amdgpu_kernel void @global_singlethread_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread") seq_cst, align 4
@@ -945,15 +945,15 @@ define amdgpu_kernel void @global_singlethread_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") unordered, align 4
@@ -1111,15 +1111,15 @@ define amdgpu_kernel void @global_singlethread_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") monotonic, align 4
@@ -1277,15 +1277,15 @@ define amdgpu_kernel void @global_singlethread_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") release, align 4
@@ -1443,15 +1443,15 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread") seq_cst, align 4
@@ -1607,15 +1607,15 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") monotonic
@@ -1771,15 +1771,15 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acquire
@@ -1935,15 +1935,15 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") release
@@ -2099,15 +2099,15 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acq_rel
@@ -2263,15 +2263,15 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") seq_cst
@@ -2455,17 +2455,17 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acquire
@@ -2650,17 +2650,17 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") acq_rel
@@ -2845,17 +2845,17 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread") seq_cst
@@ -3076,19 +3076,19 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3309,19 +3309,19 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3542,19 +3542,19 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3775,19 +3775,19 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4008,19 +4008,19 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4241,19 +4241,19 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4474,19 +4474,19 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4707,19 +4707,19 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4940,19 +4940,19 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5173,19 +5173,19 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5406,19 +5406,19 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5639,19 +5639,19 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5872,19 +5872,19 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6105,19 +6105,19 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6338,19 +6338,19 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6601,21 +6601,21 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6868,21 +6868,21 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7135,21 +7135,21 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7402,21 +7402,21 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7669,21 +7669,21 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7936,21 +7936,21 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8203,21 +8203,21 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8470,21 +8470,21 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8737,21 +8737,21 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9004,21 +9004,21 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9271,21 +9271,21 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9538,21 +9538,21 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9805,21 +9805,21 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10072,21 +10072,21 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10339,21 +10339,21 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10541,16 +10541,16 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") unordered, align 4
@@ -10736,16 +10736,16 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") monotonic, align 4
@@ -10931,16 +10931,16 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") acquire, align 4
@@ -11126,16 +11126,16 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("singlethread-one-as") seq_cst, align 4
@@ -11294,15 +11294,15 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") unordered, align 4
@@ -11460,15 +11460,15 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") monotonic, align 4
@@ -11626,15 +11626,15 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") release, align 4
@@ -11792,15 +11792,15 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("singlethread-one-as") seq_cst, align 4
@@ -11956,15 +11956,15 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") monotonic
@@ -12120,15 +12120,15 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -12284,15 +12284,15 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") release
@@ -12448,15 +12448,15 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -12612,15 +12612,15 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -12804,17 +12804,17 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -12999,17 +12999,17 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -13194,17 +13194,17 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -13425,19 +13425,19 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxch
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13658,19 +13658,19 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13891,19 +13891,19 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14124,19 +14124,19 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14357,19 +14357,19 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14590,19 +14590,19 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14823,19 +14823,19 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15056,19 +15056,19 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15289,19 +15289,19 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15522,19 +15522,19 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15755,19 +15755,19 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15988,19 +15988,19 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16221,19 +16221,19 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16454,19 +16454,19 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16687,19 +16687,19 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16950,21 +16950,21 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_ret_cm
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17217,21 +17217,21 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpx
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17484,21 +17484,21 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_ret_cmpx
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17751,21 +17751,21 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpx
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18018,21 +18018,21 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpx
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18285,21 +18285,21 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_ret_cmpx
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18552,21 +18552,21 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxch
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18819,21 +18819,21 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxch
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19086,21 +19086,21 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxch
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19353,21 +19353,21 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxch
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19620,21 +19620,21 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_ret_cmpx
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19887,21 +19887,21 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20154,21 +20154,21 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20421,21 +20421,21 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20688,21 +20688,21 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxch
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
index c194b49f25255..7ddd515830e11 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_system_unordered_load(
; GFX6-LABEL: global_system_unordered_load:
@@ -192,16 +192,16 @@ define amdgpu_kernel void @global_system_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in unordered, align 4
@@ -387,16 +387,16 @@ define amdgpu_kernel void @global_system_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in monotonic, align 4
@@ -600,17 +600,17 @@ define amdgpu_kernel void @global_system_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in acquire, align 4
@@ -833,23 +833,23 @@ define amdgpu_kernel void @global_system_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in seq_cst, align 4
@@ -1008,15 +1008,15 @@ define amdgpu_kernel void @global_system_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out unordered, align 4
@@ -1174,15 +1174,15 @@ define amdgpu_kernel void @global_system_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out monotonic, align 4
@@ -1369,20 +1369,20 @@ define amdgpu_kernel void @global_system_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out release, align 4
@@ -1569,20 +1569,20 @@ define amdgpu_kernel void @global_system_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out seq_cst, align 4
@@ -1738,15 +1738,15 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in monotonic
@@ -1933,17 +1933,17 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire
@@ -2128,20 +2128,20 @@ define amdgpu_kernel void @global_system_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in release
@@ -2357,22 +2357,22 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel
@@ -2588,22 +2588,22 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
@@ -2805,18 +2805,18 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire
@@ -3052,25 +3052,25 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel
@@ -3306,25 +3306,25 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
@@ -3545,19 +3545,19 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3809,21 +3809,21 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4073,24 +4073,24 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4371,26 +4371,26 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4671,26 +4671,26 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4942,21 +4942,21 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5208,21 +5208,21 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5503,26 +5503,26 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5803,26 +5803,26 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6103,26 +6103,26 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6403,26 +6403,26 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6673,21 +6673,21 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6958,22 +6958,22 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7277,29 +7277,29 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7603,29 +7603,29 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7900,24 +7900,24 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8188,22 +8188,22 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8507,29 +8507,29 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8833,29 +8833,29 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9159,29 +9159,29 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9485,29 +9485,29 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9807,27 +9807,27 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10131,29 +10131,29 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10457,29 +10457,29 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10783,29 +10783,29 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10993,16 +10993,16 @@ define amdgpu_kernel void @global_system_one_as_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") unordered, align 4
@@ -11188,16 +11188,16 @@ define amdgpu_kernel void @global_system_one_as_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") monotonic, align 4
@@ -11401,17 +11401,17 @@ define amdgpu_kernel void @global_system_one_as_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") acquire, align 4
@@ -11634,23 +11634,23 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") seq_cst, align 4
@@ -11809,15 +11809,15 @@ define amdgpu_kernel void @global_system_one_as_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") unordered, align 4
@@ -11975,15 +11975,15 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") monotonic, align 4
@@ -12170,20 +12170,20 @@ define amdgpu_kernel void @global_system_one_as_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") release, align 4
@@ -12370,20 +12370,20 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") seq_cst, align 4
@@ -12539,15 +12539,15 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") monotonic
@@ -12734,17 +12734,17 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire
@@ -12929,20 +12929,20 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") release
@@ -13158,22 +13158,22 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel
@@ -13389,22 +13389,22 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst
@@ -13606,18 +13606,18 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire
@@ -13853,25 +13853,25 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel
@@ -14107,25 +14107,25 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst
@@ -14346,19 +14346,19 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14610,21 +14610,21 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14874,24 +14874,24 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15172,26 +15172,26 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15472,26 +15472,26 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15743,21 +15743,21 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16009,21 +16009,21 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16304,26 +16304,26 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16604,26 +16604,26 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16904,26 +16904,26 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17204,26 +17204,26 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17504,26 +17504,26 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17804,26 +17804,26 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18104,26 +18104,26 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18404,26 +18404,26 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18674,21 +18674,21 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18959,22 +18959,22 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19256,26 +19256,26 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19579,29 +19579,29 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19905,29 +19905,29 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20202,24 +20202,24 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20490,22 +20490,22 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20809,29 +20809,29 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21135,29 +21135,29 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21461,29 +21461,29 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21787,29 +21787,29 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -22109,27 +22109,27 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -22433,29 +22433,29 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -22759,29 +22759,29 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -23085,29 +23085,29 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_wb scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_inv scope:SCOPE_SYS
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
index 10d9ee0617a0e..0d18963cbfb68 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
@@ -8,7 +8,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_volatile_load_0(
; GFX6-LABEL: global_volatile_load_0:
@@ -148,18 +148,18 @@ define amdgpu_kernel void @global_volatile_load_0(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_volatile_load_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_volatile_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4
@@ -360,22 +360,22 @@ define amdgpu_kernel void @global_volatile_load_1(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_volatile_load_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-CU-NEXT: s_wait_samplecnt 0x0
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_volatile_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_bvhcnt 0x0
+; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -533,18 +533,18 @@ define amdgpu_kernel void @global_volatile_store_0(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_volatile_store_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_volatile_store_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -738,20 +738,20 @@ define amdgpu_kernel void @global_volatile_store_1(
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_volatile_store_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[2:3], 0x0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s3, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s3
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
-; GFX1250-CU-NEXT: s_wait_storecnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_volatile_store_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v0, v0, s3
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -898,16 +898,16 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_volatile_workgroup_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_volatile_workgroup_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic volatile i32, ptr addrspace(1) %in syncscope("workgroup") acquire, align 4
@@ -1040,16 +1040,16 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_volatile_workgroup_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_volatile_workgroup_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic volatile i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
index f64b283edf43f..1aa8305b1a837 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_wavefront_unordered_load(
; GFX6-LABEL: global_wavefront_unordered_load:
@@ -192,16 +192,16 @@ define amdgpu_kernel void @global_wavefront_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") unordered, align 4
@@ -387,16 +387,16 @@ define amdgpu_kernel void @global_wavefront_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") monotonic, align 4
@@ -582,16 +582,16 @@ define amdgpu_kernel void @global_wavefront_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") acquire, align 4
@@ -777,16 +777,16 @@ define amdgpu_kernel void @global_wavefront_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront") seq_cst, align 4
@@ -945,15 +945,15 @@ define amdgpu_kernel void @global_wavefront_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") unordered, align 4
@@ -1111,15 +1111,15 @@ define amdgpu_kernel void @global_wavefront_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") monotonic, align 4
@@ -1277,15 +1277,15 @@ define amdgpu_kernel void @global_wavefront_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") release, align 4
@@ -1443,15 +1443,15 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront") seq_cst, align 4
@@ -1607,15 +1607,15 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") monotonic
@@ -1771,15 +1771,15 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acquire
@@ -1935,15 +1935,15 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") release
@@ -2099,15 +2099,15 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acq_rel
@@ -2263,15 +2263,15 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") seq_cst
@@ -2455,17 +2455,17 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acquire
@@ -2650,17 +2650,17 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") acq_rel
@@ -2845,17 +2845,17 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront") seq_cst
@@ -3076,19 +3076,19 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3309,19 +3309,19 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3542,19 +3542,19 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3775,19 +3775,19 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4008,19 +4008,19 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4241,19 +4241,19 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4474,19 +4474,19 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4707,19 +4707,19 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4940,19 +4940,19 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5173,19 +5173,19 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5406,19 +5406,19 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5639,19 +5639,19 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5872,19 +5872,19 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6105,19 +6105,19 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6338,19 +6338,19 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6601,21 +6601,21 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6868,21 +6868,21 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7135,21 +7135,21 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7402,21 +7402,21 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7669,21 +7669,21 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7936,21 +7936,21 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8203,21 +8203,21 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8470,21 +8470,21 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8737,21 +8737,21 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9004,21 +9004,21 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9271,21 +9271,21 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9538,21 +9538,21 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9805,21 +9805,21 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10072,21 +10072,21 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10339,21 +10339,21 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10541,16 +10541,16 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") unordered, align 4
@@ -10736,16 +10736,16 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") monotonic, align 4
@@ -10931,16 +10931,16 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") acquire, align 4
@@ -11126,16 +11126,16 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("wavefront-one-as") seq_cst, align 4
@@ -11294,15 +11294,15 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") unordered, align 4
@@ -11460,15 +11460,15 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") monotonic, align 4
@@ -11626,15 +11626,15 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") release, align 4
@@ -11792,15 +11792,15 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("wavefront-one-as") seq_cst, align 4
@@ -11956,15 +11956,15 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") monotonic
@@ -12120,15 +12120,15 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -12284,15 +12284,15 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") release
@@ -12448,15 +12448,15 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -12612,15 +12612,15 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -12804,17 +12804,17 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -12999,17 +12999,17 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -13194,17 +13194,17 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -13425,19 +13425,19 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13658,19 +13658,19 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -13891,19 +13891,19 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14124,19 +14124,19 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14357,19 +14357,19 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14590,19 +14590,19 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14823,19 +14823,19 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15056,19 +15056,19 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15289,19 +15289,19 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15522,19 +15522,19 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15755,19 +15755,19 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15988,19 +15988,19 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16221,19 +16221,19 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16454,19 +16454,19 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16687,19 +16687,19 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16950,21 +16950,21 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxc
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17217,21 +17217,21 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17484,21 +17484,21 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17751,21 +17751,21 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18018,21 +18018,21 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18285,21 +18285,21 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18552,21 +18552,21 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18819,21 +18819,21 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19086,21 +19086,21 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19353,21 +19353,21 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19620,21 +19620,21 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19887,21 +19887,21 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20154,21 +20154,21 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20421,21 +20421,21 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20688,21 +20688,21 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
index c1879c8eb11af..3eab16e6b9713 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_workgroup_unordered_load(
; GFX6-LABEL: global_workgroup_unordered_load:
@@ -192,16 +192,16 @@ define amdgpu_kernel void @global_workgroup_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") unordered, align 4
@@ -387,16 +387,16 @@ define amdgpu_kernel void @global_workgroup_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") monotonic, align 4
@@ -587,16 +587,16 @@ define amdgpu_kernel void @global_workgroup_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") acquire, align 4
@@ -799,17 +799,17 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup") seq_cst, align 4
@@ -968,15 +968,15 @@ define amdgpu_kernel void @global_workgroup_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") unordered, align 4
@@ -1134,15 +1134,15 @@ define amdgpu_kernel void @global_workgroup_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") monotonic, align 4
@@ -1318,16 +1318,16 @@ define amdgpu_kernel void @global_workgroup_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4
@@ -1503,16 +1503,16 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup") seq_cst, align 4
@@ -1668,15 +1668,15 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") monotonic
@@ -1842,15 +1842,15 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acquire
@@ -2024,16 +2024,16 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") release
@@ -2217,16 +2217,16 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acq_rel
@@ -2410,16 +2410,16 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") seq_cst
@@ -2608,17 +2608,17 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acquire
@@ -2828,18 +2828,18 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") acq_rel
@@ -3049,18 +3049,18 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup") seq_cst
@@ -3281,19 +3281,19 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3524,19 +3524,19 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -3775,20 +3775,20 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4037,20 +4037,20 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4299,20 +4299,20 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4543,19 +4543,19 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -4786,19 +4786,19 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5047,20 +5047,20 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5309,20 +5309,20 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5571,20 +5571,20 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -5833,20 +5833,20 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6095,20 +6095,20 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6357,20 +6357,20 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6619,20 +6619,20 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -6881,20 +6881,20 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7145,21 +7145,21 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7417,21 +7417,21 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7702,22 +7702,22 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -7995,22 +7995,22 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8288,22 +8288,22 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8563,21 +8563,21 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -8835,21 +8835,21 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9127,22 +9127,22 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9420,22 +9420,22 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -9713,22 +9713,22 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10006,22 +10006,22 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10297,22 +10297,22 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10590,22 +10590,22 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -10883,22 +10883,22 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -11176,22 +11176,22 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -11379,16 +11379,16 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") unordered, align 4
@@ -11574,16 +11574,16 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") monotonic, align 4
@@ -11774,16 +11774,16 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") acquire, align 4
@@ -11982,16 +11982,16 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic i32, ptr addrspace(1) %in syncscope("workgroup-one-as") seq_cst, align 4
@@ -12150,15 +12150,15 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") unordered, align 4
@@ -12316,15 +12316,15 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") monotonic, align 4
@@ -12492,15 +12492,15 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") release, align 4
@@ -12668,15 +12668,15 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic i32 %in, ptr addrspace(1) %out syncscope("workgroup-one-as") seq_cst, align 4
@@ -12832,15 +12832,15 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") monotonic
@@ -13006,15 +13006,15 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -13180,15 +13180,15 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") release
@@ -13364,15 +13364,15 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -13548,15 +13548,15 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -13745,17 +13745,17 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -13957,17 +13957,17 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -14169,17 +14169,17 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -14400,19 +14400,19 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14643,19 +14643,19 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -14886,19 +14886,19 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15139,19 +15139,19 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15392,19 +15392,19 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15635,19 +15635,19 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -15878,19 +15878,19 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16131,19 +16131,19 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16384,19 +16384,19 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16637,19 +16637,19 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -16890,19 +16890,19 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17143,19 +17143,19 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17396,19 +17396,19 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17649,19 +17649,19 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -17902,19 +17902,19 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18165,21 +18165,21 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_ret_cmpxc
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18437,21 +18437,21 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18714,21 +18714,21 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -18998,21 +18998,21 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19282,21 +19282,21 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19556,21 +19556,21 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -19828,21 +19828,21 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20112,21 +20112,21 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20396,21 +20396,21 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20680,21 +20680,21 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -20964,21 +20964,21 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21246,21 +21246,21 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21530,21 +21530,21 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -21814,21 +21814,21 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
@@ -22098,21 +22098,21 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0xc
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s3
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX1250-CU-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
-; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x8
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0xc
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(1) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
index fe703f5e8c90f..102616b9a2065 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @local_agent_unordered_load(
; GFX6-LABEL: local_agent_unordered_load:
@@ -179,17 +179,17 @@ define amdgpu_kernel void @local_agent_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") unordered, align 4
@@ -362,17 +362,17 @@ define amdgpu_kernel void @local_agent_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") monotonic, align 4
@@ -550,17 +550,17 @@ define amdgpu_kernel void @local_agent_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") acquire, align 4
@@ -756,18 +756,18 @@ define amdgpu_kernel void @local_agent_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent") seq_cst, align 4
@@ -910,15 +910,15 @@ define amdgpu_kernel void @local_agent_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") unordered, align 4
@@ -1060,15 +1060,15 @@ define amdgpu_kernel void @local_agent_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") monotonic, align 4
@@ -1228,16 +1228,16 @@ define amdgpu_kernel void @local_agent_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") release, align 4
@@ -1397,16 +1397,16 @@ define amdgpu_kernel void @local_agent_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent") seq_cst, align 4
@@ -1548,15 +1548,15 @@ define amdgpu_kernel void @local_agent_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") monotonic
@@ -1714,16 +1714,16 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acquire
@@ -1883,16 +1883,16 @@ define amdgpu_kernel void @local_agent_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") release
@@ -2068,17 +2068,17 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acq_rel
@@ -2254,17 +2254,17 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") seq_cst
@@ -2453,18 +2453,18 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acquire
@@ -2672,19 +2672,19 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") acq_rel
@@ -2892,19 +2892,19 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent") seq_cst
@@ -3073,17 +3073,17 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3268,18 +3268,18 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3466,18 +3466,18 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3680,19 +3680,19 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3895,19 +3895,19 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4092,18 +4092,18 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4288,18 +4288,18 @@ define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4502,19 +4502,19 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4717,19 +4717,19 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4932,19 +4932,19 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5147,19 +5147,19 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5362,19 +5362,19 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5577,19 +5577,19 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5792,19 +5792,19 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6007,19 +6007,19 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6230,20 +6230,20 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6461,20 +6461,20 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6705,21 +6705,21 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6955,21 +6955,21 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7205,21 +7205,21 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7437,20 +7437,20 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7668,20 +7668,20 @@ define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7917,21 +7917,21 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8167,21 +8167,21 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8417,21 +8417,21 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8667,21 +8667,21 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8917,21 +8917,21 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9167,21 +9167,21 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9417,21 +9417,21 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9667,21 +9667,21 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9856,17 +9856,17 @@ define amdgpu_kernel void @local_agent_one_as_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") unordered, align 4
@@ -10039,17 +10039,17 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") monotonic, align 4
@@ -10222,17 +10222,17 @@ define amdgpu_kernel void @local_agent_one_as_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") acquire, align 4
@@ -10405,17 +10405,17 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("agent-one-as") seq_cst, align 4
@@ -10558,15 +10558,15 @@ define amdgpu_kernel void @local_agent_one_as_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") unordered, align 4
@@ -10708,15 +10708,15 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") monotonic, align 4
@@ -10858,15 +10858,15 @@ define amdgpu_kernel void @local_agent_one_as_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") release, align 4
@@ -11008,15 +11008,15 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("agent-one-as") seq_cst, align 4
@@ -11158,15 +11158,15 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") monotonic
@@ -11308,15 +11308,15 @@ define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acquire
@@ -11458,15 +11458,15 @@ define amdgpu_kernel void @local_agent_one_as_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") release
@@ -11608,15 +11608,15 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -11758,15 +11758,15 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -11950,18 +11950,18 @@ define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acquire
@@ -12146,18 +12146,18 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") acq_rel
@@ -12342,18 +12342,18 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("agent-one-as") seq_cst
@@ -12522,17 +12522,17 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12701,17 +12701,17 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12880,17 +12880,17 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13059,17 +13059,17 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13238,17 +13238,17 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13417,17 +13417,17 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13596,17 +13596,17 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13775,17 +13775,17 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13954,17 +13954,17 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14133,17 +14133,17 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14312,17 +14312,17 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14491,17 +14491,17 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14670,17 +14670,17 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14849,17 +14849,17 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15028,17 +15028,17 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15249,20 +15249,20 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15475,20 +15475,20 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15701,20 +15701,20 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15927,20 +15927,20 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16153,20 +16153,20 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16379,20 +16379,20 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16605,20 +16605,20 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16831,20 +16831,20 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17057,20 +17057,20 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17283,20 +17283,20 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17509,20 +17509,20 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17735,20 +17735,20 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17961,20 +17961,20 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -18187,20 +18187,20 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -18413,20 +18413,20 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
index 689932469d78d..c6f7ce51f5ea2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @local_nontemporal_load_0(
; GFX6-LABEL: local_nontemporal_load_0:
@@ -195,17 +195,17 @@ define amdgpu_kernel void @local_nontemporal_load_0(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_nontemporal_load_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ds_load_b32 v1, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_nontemporal_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ds_load_b32 v1, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(3) %in, align 4, !nontemporal !0
@@ -442,21 +442,21 @@ define amdgpu_kernel void @local_nontemporal_load_1(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_nontemporal_load_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_mov_b32 s2, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s2
-; GFX1250-CU-NEXT: s_mov_b32 s2, 2
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3
-; GFX1250-CU-NEXT: ds_load_b32 v1, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_nontemporal_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2
+; GFX1250-NEXT: s_mov_b32 s2, 2
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3
+; GFX1250-NEXT: ds_load_b32 v1, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -627,17 +627,17 @@ define amdgpu_kernel void @local_nontemporal_store_0(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_nontemporal_store_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[2:3], 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_nontemporal_store_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -844,21 +844,21 @@ define amdgpu_kernel void @local_nontemporal_store_1(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_nontemporal_store_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s1, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s1
-; GFX1250-CU-NEXT: s_mov_b32 s1, 2
-; GFX1250-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_nontemporal_store_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s1, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1
+; GFX1250-NEXT: s_mov_b32 s1, 2
+; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1049,17 +1049,17 @@ define amdgpu_kernel void @local_nontemporal_volatile_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_nontemporal_volatile_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ds_load_b32 v1, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_nontemporal_volatile_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ds_load_b32 v1, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(3) %in, align 4, !nontemporal !0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
index 97c80ece2b053..1800acbbf605b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @local_singlethread_unordered_load(
; GFX6-LABEL: local_singlethread_unordered_load:
@@ -179,17 +179,17 @@ define amdgpu_kernel void @local_singlethread_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") unordered, align 4
@@ -362,17 +362,17 @@ define amdgpu_kernel void @local_singlethread_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") monotonic, align 4
@@ -545,17 +545,17 @@ define amdgpu_kernel void @local_singlethread_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") acquire, align 4
@@ -728,17 +728,17 @@ define amdgpu_kernel void @local_singlethread_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread") seq_cst, align 4
@@ -881,15 +881,15 @@ define amdgpu_kernel void @local_singlethread_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") unordered, align 4
@@ -1031,15 +1031,15 @@ define amdgpu_kernel void @local_singlethread_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") monotonic, align 4
@@ -1181,15 +1181,15 @@ define amdgpu_kernel void @local_singlethread_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") release, align 4
@@ -1331,15 +1331,15 @@ define amdgpu_kernel void @local_singlethread_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread") seq_cst, align 4
@@ -1481,15 +1481,15 @@ define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") monotonic
@@ -1631,15 +1631,15 @@ define amdgpu_kernel void @local_singlethread_acquire_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acquire
@@ -1781,15 +1781,15 @@ define amdgpu_kernel void @local_singlethread_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") release
@@ -1931,15 +1931,15 @@ define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acq_rel
@@ -2081,15 +2081,15 @@ define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") seq_cst
@@ -2273,18 +2273,18 @@ define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acquire
@@ -2469,18 +2469,18 @@ define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") acq_rel
@@ -2665,18 +2665,18 @@ define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread") seq_cst
@@ -2845,17 +2845,17 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3024,17 +3024,17 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3203,17 +3203,17 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3382,17 +3382,17 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3561,17 +3561,17 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3740,17 +3740,17 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3919,17 +3919,17 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4098,17 +4098,17 @@ define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4277,17 +4277,17 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4456,17 +4456,17 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4635,17 +4635,17 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4814,17 +4814,17 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4993,17 +4993,17 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5172,17 +5172,17 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5351,17 +5351,17 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5572,20 +5572,20 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5798,20 +5798,20 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6024,20 +6024,20 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6250,20 +6250,20 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6476,20 +6476,20 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6702,20 +6702,20 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6928,20 +6928,20 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7154,20 +7154,20 @@ define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7380,20 +7380,20 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7606,20 +7606,20 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7832,20 +7832,20 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8058,20 +8058,20 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8284,20 +8284,20 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8510,20 +8510,20 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8736,20 +8736,20 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8924,17 +8924,17 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") unordered, align 4
@@ -9107,17 +9107,17 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") monotonic, align 4
@@ -9290,17 +9290,17 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") acquire, align 4
@@ -9473,17 +9473,17 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("singlethread-one-as") seq_cst, align 4
@@ -9626,15 +9626,15 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") unordered, align 4
@@ -9776,15 +9776,15 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") monotonic, align 4
@@ -9926,15 +9926,15 @@ define amdgpu_kernel void @local_singlethread_one_as_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") release, align 4
@@ -10076,15 +10076,15 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("singlethread-one-as") seq_cst, align 4
@@ -10226,15 +10226,15 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") monotonic
@@ -10376,15 +10376,15 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -10526,15 +10526,15 @@ define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") release
@@ -10676,15 +10676,15 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -10826,15 +10826,15 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -11018,18 +11018,18 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acquire
@@ -11214,18 +11214,18 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") acq_rel
@@ -11410,18 +11410,18 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("singlethread-one-as") seq_cst
@@ -11590,17 +11590,17 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11769,17 +11769,17 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11948,17 +11948,17 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12127,17 +12127,17 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12306,17 +12306,17 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12485,17 +12485,17 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12664,17 +12664,17 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12843,17 +12843,17 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13022,17 +13022,17 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13201,17 +13201,17 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13380,17 +13380,17 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13559,17 +13559,17 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13738,17 +13738,17 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13917,17 +13917,17 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14096,17 +14096,17 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14317,20 +14317,20 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_ret_cmp
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14543,20 +14543,20 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxc
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14769,20 +14769,20 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_ret_cmpxc
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14995,20 +14995,20 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxc
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15221,20 +15221,20 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxc
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15447,20 +15447,20 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_ret_cmpxc
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15673,20 +15673,20 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15899,20 +15899,20 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16125,20 +16125,20 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16351,20 +16351,20 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16577,20 +16577,20 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_ret_cmpxc
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16803,20 +16803,20 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17029,20 +17029,20 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17255,20 +17255,20 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17481,20 +17481,20 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
index fdf69a5998652..1356fe4854170 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @local_system_unordered_load(
; GFX6-LABEL: local_system_unordered_load:
@@ -179,17 +179,17 @@ define amdgpu_kernel void @local_system_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in unordered, align 4
@@ -362,17 +362,17 @@ define amdgpu_kernel void @local_system_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in monotonic, align 4
@@ -550,17 +550,17 @@ define amdgpu_kernel void @local_system_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in acquire, align 4
@@ -756,18 +756,18 @@ define amdgpu_kernel void @local_system_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in seq_cst, align 4
@@ -910,15 +910,15 @@ define amdgpu_kernel void @local_system_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out unordered, align 4
@@ -1060,15 +1060,15 @@ define amdgpu_kernel void @local_system_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out monotonic, align 4
@@ -1228,16 +1228,16 @@ define amdgpu_kernel void @local_system_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out release, align 4
@@ -1397,16 +1397,16 @@ define amdgpu_kernel void @local_system_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out seq_cst, align 4
@@ -1548,15 +1548,15 @@ define amdgpu_kernel void @local_system_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in monotonic
@@ -1714,16 +1714,16 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acquire
@@ -1883,16 +1883,16 @@ define amdgpu_kernel void @local_system_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in release
@@ -2068,17 +2068,17 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acq_rel
@@ -2254,17 +2254,17 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in seq_cst
@@ -2453,18 +2453,18 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acquire
@@ -2672,19 +2672,19 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in acq_rel
@@ -2892,19 +2892,19 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in seq_cst
@@ -3073,17 +3073,17 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3268,18 +3268,18 @@ define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3466,18 +3466,18 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3680,19 +3680,19 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3895,19 +3895,19 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4092,18 +4092,18 @@ define amdgpu_kernel void @local_system_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4288,18 +4288,18 @@ define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4502,19 +4502,19 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4717,19 +4717,19 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4932,19 +4932,19 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5147,19 +5147,19 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5362,19 +5362,19 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5577,19 +5577,19 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5792,19 +5792,19 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6007,19 +6007,19 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6230,20 +6230,20 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6461,20 +6461,20 @@ define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6705,21 +6705,21 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6955,21 +6955,21 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7205,21 +7205,21 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7437,20 +7437,20 @@ define amdgpu_kernel void @local_system_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7668,20 +7668,20 @@ define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7917,21 +7917,21 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8167,21 +8167,21 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8417,21 +8417,21 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8667,21 +8667,21 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8917,21 +8917,21 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9167,21 +9167,21 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9417,21 +9417,21 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9667,21 +9667,21 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9856,17 +9856,17 @@ define amdgpu_kernel void @local_system_one_as_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") unordered, align 4
@@ -10039,17 +10039,17 @@ define amdgpu_kernel void @local_system_one_as_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") monotonic, align 4
@@ -10222,17 +10222,17 @@ define amdgpu_kernel void @local_system_one_as_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") acquire, align 4
@@ -10405,17 +10405,17 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("one-as") seq_cst, align 4
@@ -10558,15 +10558,15 @@ define amdgpu_kernel void @local_system_one_as_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") unordered, align 4
@@ -10708,15 +10708,15 @@ define amdgpu_kernel void @local_system_one_as_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") monotonic, align 4
@@ -10858,15 +10858,15 @@ define amdgpu_kernel void @local_system_one_as_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") release, align 4
@@ -11008,15 +11008,15 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("one-as") seq_cst, align 4
@@ -11158,15 +11158,15 @@ define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") monotonic
@@ -11308,15 +11308,15 @@ define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acquire
@@ -11458,15 +11458,15 @@ define amdgpu_kernel void @local_system_one_as_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") release
@@ -11608,15 +11608,15 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acq_rel
@@ -11758,15 +11758,15 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") seq_cst
@@ -11950,18 +11950,18 @@ define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acquire
@@ -12146,18 +12146,18 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") acq_rel
@@ -12342,18 +12342,18 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("one-as") seq_cst
@@ -12522,17 +12522,17 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12701,17 +12701,17 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12880,17 +12880,17 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13059,17 +13059,17 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13238,17 +13238,17 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13417,17 +13417,17 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13596,17 +13596,17 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13775,17 +13775,17 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13954,17 +13954,17 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14133,17 +14133,17 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14312,17 +14312,17 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14491,17 +14491,17 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14670,17 +14670,17 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14849,17 +14849,17 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15028,17 +15028,17 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15249,20 +15249,20 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15475,20 +15475,20 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15701,20 +15701,20 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15927,20 +15927,20 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16153,20 +16153,20 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16379,20 +16379,20 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16605,20 +16605,20 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16831,20 +16831,20 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17057,20 +17057,20 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17283,20 +17283,20 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17509,20 +17509,20 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17735,20 +17735,20 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17961,20 +17961,20 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -18187,20 +18187,20 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -18413,20 +18413,20 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
index 88cba0bddf5d7..75e28f9008e28 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
@@ -8,7 +8,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @local_volatile_load_0(
; GFX6-LABEL: local_volatile_load_0:
@@ -143,17 +143,17 @@ define amdgpu_kernel void @local_volatile_load_0(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_volatile_load_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: ds_load_b32 v1, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_volatile_load_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: ds_load_b32 v1, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(3) %in, align 4
@@ -322,21 +322,21 @@ define amdgpu_kernel void @local_volatile_load_1(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_volatile_load_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-CU-NEXT: s_load_b32 s3, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-CU-NEXT: s_mov_b32 s2, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s2
-; GFX1250-CU-NEXT: s_mov_b32 s2, 2
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3
-; GFX1250-CU-NEXT: ds_load_b32 v1, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_volatile_load_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: v_mov_b32_e32 v1, v0
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2
+; GFX1250-NEXT: s_mov_b32 s2, 2
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3
+; GFX1250-NEXT: ds_load_b32 v1, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -459,17 +459,17 @@ define amdgpu_kernel void @local_volatile_store_0(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_volatile_store_0:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[2:3], 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_volatile_store_0:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@@ -612,21 +612,21 @@ define amdgpu_kernel void @local_volatile_store_1(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_volatile_store_1:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX1250-CU-NEXT: s_wait_xcnt 0x0
-; GFX1250-CU-NEXT: s_mov_b32 s1, 0x3ff
-; GFX1250-CU-NEXT: v_and_b32_e64 v0, v0, s1
-; GFX1250-CU-NEXT: s_mov_b32 s1, 2
-; GFX1250-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_volatile_store_1:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_mov_b32 s1, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1
+; GFX1250-NEXT: s_mov_b32 s1, 2
+; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -756,17 +756,17 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_volatile_workgroup_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_volatile_workgroup_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic volatile i32, ptr addrspace(3) %in syncscope("workgroup") acquire, align 4
@@ -883,16 +883,16 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_volatile_workgroup_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_volatile_workgroup_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic volatile i32 %in, ptr addrspace(3) %out syncscope("workgroup") release, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
index b8ad75049aff8..7e345ed6e2716 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @local_wavefront_unordered_load(
; GFX6-LABEL: local_wavefront_unordered_load:
@@ -179,17 +179,17 @@ define amdgpu_kernel void @local_wavefront_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") unordered, align 4
@@ -362,17 +362,17 @@ define amdgpu_kernel void @local_wavefront_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") monotonic, align 4
@@ -545,17 +545,17 @@ define amdgpu_kernel void @local_wavefront_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") acquire, align 4
@@ -728,17 +728,17 @@ define amdgpu_kernel void @local_wavefront_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront") seq_cst, align 4
@@ -881,15 +881,15 @@ define amdgpu_kernel void @local_wavefront_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") unordered, align 4
@@ -1031,15 +1031,15 @@ define amdgpu_kernel void @local_wavefront_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") monotonic, align 4
@@ -1181,15 +1181,15 @@ define amdgpu_kernel void @local_wavefront_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") release, align 4
@@ -1331,15 +1331,15 @@ define amdgpu_kernel void @local_wavefront_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront") seq_cst, align 4
@@ -1481,15 +1481,15 @@ define amdgpu_kernel void @local_wavefront_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") monotonic
@@ -1631,15 +1631,15 @@ define amdgpu_kernel void @local_wavefront_acquire_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acquire
@@ -1781,15 +1781,15 @@ define amdgpu_kernel void @local_wavefront_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") release
@@ -1931,15 +1931,15 @@ define amdgpu_kernel void @local_wavefront_acq_rel_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acq_rel
@@ -2081,15 +2081,15 @@ define amdgpu_kernel void @local_wavefront_seq_cst_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") seq_cst
@@ -2273,18 +2273,18 @@ define amdgpu_kernel void @local_wavefront_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acquire
@@ -2469,18 +2469,18 @@ define amdgpu_kernel void @local_wavefront_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") acq_rel
@@ -2665,18 +2665,18 @@ define amdgpu_kernel void @local_wavefront_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront") seq_cst
@@ -2845,17 +2845,17 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3024,17 +3024,17 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3203,17 +3203,17 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3382,17 +3382,17 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3561,17 +3561,17 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3740,17 +3740,17 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3919,17 +3919,17 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4098,17 +4098,17 @@ define amdgpu_kernel void @local_wavefront_release_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4277,17 +4277,17 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4456,17 +4456,17 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4635,17 +4635,17 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4814,17 +4814,17 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4993,17 +4993,17 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5172,17 +5172,17 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5351,17 +5351,17 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5572,20 +5572,20 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5798,20 +5798,20 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6024,20 +6024,20 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6250,20 +6250,20 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6476,20 +6476,20 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6702,20 +6702,20 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6928,20 +6928,20 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7154,20 +7154,20 @@ define amdgpu_kernel void @local_wavefront_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7380,20 +7380,20 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7606,20 +7606,20 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7832,20 +7832,20 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8058,20 +8058,20 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8284,20 +8284,20 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8510,20 +8510,20 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8736,20 +8736,20 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8924,17 +8924,17 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") unordered, align 4
@@ -9107,17 +9107,17 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") monotonic, align 4
@@ -9290,17 +9290,17 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") acquire, align 4
@@ -9473,17 +9473,17 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("wavefront-one-as") seq_cst, align 4
@@ -9626,15 +9626,15 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") unordered, align 4
@@ -9776,15 +9776,15 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") monotonic, align 4
@@ -9926,15 +9926,15 @@ define amdgpu_kernel void @local_wavefront_one_as_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") release, align 4
@@ -10076,15 +10076,15 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("wavefront-one-as") seq_cst, align 4
@@ -10226,15 +10226,15 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") monotonic
@@ -10376,15 +10376,15 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -10526,15 +10526,15 @@ define amdgpu_kernel void @local_wavefront_one_as_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") release
@@ -10676,15 +10676,15 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -10826,15 +10826,15 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -11018,18 +11018,18 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acquire
@@ -11214,18 +11214,18 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") acq_rel
@@ -11410,18 +11410,18 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("wavefront-one-as") seq_cst
@@ -11590,17 +11590,17 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11769,17 +11769,17 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -11948,17 +11948,17 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12127,17 +12127,17 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12306,17 +12306,17 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12485,17 +12485,17 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12664,17 +12664,17 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12843,17 +12843,17 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13022,17 +13022,17 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13201,17 +13201,17 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13380,17 +13380,17 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13559,17 +13559,17 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13738,17 +13738,17 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13917,17 +13917,17 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14096,17 +14096,17 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14317,20 +14317,20 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_ret_cmpxch
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14543,20 +14543,20 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14769,20 +14769,20 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14995,20 +14995,20 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15221,20 +15221,20 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15447,20 +15447,20 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15673,20 +15673,20 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15899,20 +15899,20 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16125,20 +16125,20 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16351,20 +16351,20 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16577,20 +16577,20 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16803,20 +16803,20 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17029,20 +17029,20 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17255,20 +17255,20 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17481,20 +17481,20 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
index 94f5aab1eb67d..6aaf9d323b1fd 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
@@ -12,7 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @local_workgroup_unordered_load(
; GFX6-LABEL: local_workgroup_unordered_load:
@@ -179,17 +179,17 @@ define amdgpu_kernel void @local_workgroup_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup") unordered, align 4
@@ -362,17 +362,17 @@ define amdgpu_kernel void @local_workgroup_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup") monotonic, align 4
@@ -550,17 +550,17 @@ define amdgpu_kernel void @local_workgroup_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup") acquire, align 4
@@ -756,18 +756,18 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup") seq_cst, align 4
@@ -910,15 +910,15 @@ define amdgpu_kernel void @local_workgroup_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup") unordered, align 4
@@ -1060,15 +1060,15 @@ define amdgpu_kernel void @local_workgroup_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup") monotonic, align 4
@@ -1228,16 +1228,16 @@ define amdgpu_kernel void @local_workgroup_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup") release, align 4
@@ -1397,16 +1397,16 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup") seq_cst, align 4
@@ -1548,15 +1548,15 @@ define amdgpu_kernel void @local_workgroup_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") monotonic
@@ -1714,16 +1714,16 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") acquire
@@ -1883,16 +1883,16 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") release
@@ -2068,17 +2068,17 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") acq_rel
@@ -2254,17 +2254,17 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") seq_cst
@@ -2453,18 +2453,18 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") acquire
@@ -2672,19 +2672,19 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") acq_rel
@@ -2892,19 +2892,19 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup") seq_cst
@@ -3073,17 +3073,17 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3268,18 +3268,18 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3466,18 +3466,18 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3680,19 +3680,19 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -3895,19 +3895,19 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4092,18 +4092,18 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4288,18 +4288,18 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4502,19 +4502,19 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4717,19 +4717,19 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -4932,19 +4932,19 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5147,19 +5147,19 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5362,19 +5362,19 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5577,19 +5577,19 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -5792,19 +5792,19 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6007,19 +6007,19 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6230,20 +6230,20 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6461,20 +6461,20 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6705,21 +6705,21 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -6955,21 +6955,21 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7205,21 +7205,21 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7437,20 +7437,20 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7668,20 +7668,20 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -7917,21 +7917,21 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8167,21 +8167,21 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8417,21 +8417,21 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8667,21 +8667,21 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -8917,21 +8917,21 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9167,21 +9167,21 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9417,21 +9417,21 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9667,21 +9667,21 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -9856,17 +9856,17 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_unordered_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_unordered_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup-one-as") unordered, align 4
@@ -10039,17 +10039,17 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_monotonic_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_monotonic_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup-one-as") monotonic, align 4
@@ -10222,17 +10222,17 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acquire_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acquire_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup-one-as") acquire, align 4
@@ -10405,17 +10405,17 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_load(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_load:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: ds_load_b32 v1, v0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_load:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: ds_load_b32 v1, v0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic i32, ptr addrspace(3) %in syncscope("workgroup-one-as") seq_cst, align 4
@@ -10558,15 +10558,15 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_unordered_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_unordered_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup-one-as") unordered, align 4
@@ -10708,15 +10708,15 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_monotonic_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_monotonic_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup-one-as") monotonic, align 4
@@ -10858,15 +10858,15 @@ define amdgpu_kernel void @local_workgroup_one_as_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_release_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_release_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup-one-as") release, align 4
@@ -11008,15 +11008,15 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_store:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_store:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic i32 %in, ptr addrspace(3) %out syncscope("workgroup-one-as") seq_cst, align 4
@@ -11158,15 +11158,15 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_monotonic_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_monotonic_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") monotonic
@@ -11308,15 +11308,15 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acquire_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acquire_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -11458,15 +11458,15 @@ define amdgpu_kernel void @local_workgroup_one_as_release_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_release_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_release_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") release
@@ -11608,15 +11608,15 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acq_rel_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acq_rel_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -11758,15 +11758,15 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_atomicrmw(
; GFX12-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s1
+; GFX1250-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -11950,18 +11950,18 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") acquire
@@ -12146,18 +12146,18 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") acq_rel
@@ -12342,18 +12342,18 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg ptr addrspace(3) %out, i32 %in syncscope("workgroup-one-as") seq_cst
@@ -12522,17 +12522,17 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12701,17 +12701,17 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -12880,17 +12880,17 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13059,17 +13059,17 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13238,17 +13238,17 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13417,17 +13417,17 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13596,17 +13596,17 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13775,17 +13775,17 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_release_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_release_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -13954,17 +13954,17 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14133,17 +14133,17 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14312,17 +14312,17 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14491,17 +14491,17 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14670,17 +14670,17 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -14849,17 +14849,17 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15028,17 +15028,17 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX12-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-CU-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s1
+; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15249,20 +15249,20 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_ret_cmpxch
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15475,20 +15475,20 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15701,20 +15701,20 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -15927,20 +15927,20 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16153,20 +16153,20 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16379,20 +16379,20 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16605,20 +16605,20 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -16831,20 +16831,20 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17057,20 +17057,20 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17283,20 +17283,20 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17509,20 +17509,20 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17735,20 +17735,20 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -17961,20 +17961,20 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -18187,20 +18187,20 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
@@ -18413,20 +18413,20 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
-; GFX1250-CU-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
-; GFX1250-CU: ; %bb.0: ; %entry
-; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
-; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x4
-; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
-; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-CU-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-CU-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
-; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s0
-; GFX1250-CU-NEXT: s_wait_dscnt 0x0
-; GFX1250-CU-NEXT: ds_store_b32 v0, v1
-; GFX1250-CU-NEXT: s_endpgm
+; GFX1250-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
+; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v0, s0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, ptr addrspace(3) %out, i32 4
More information about the llvm-commits
mailing list