[llvm] [AMDGPU] Add legalization rules for atomic ops (PR #175253)
Anshil Gandhi via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 14 11:34:52 PST 2026
https://github.com/gandhi56 updated https://github.com/llvm/llvm-project/pull/175253
>From 5a0c926fadea1adb161cdc44ea87502f6592b5be Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <Anshil.Gandhi at amd.com>
Date: Sun, 14 Dec 2025 14:38:28 -0600
Subject: [PATCH] [AMDGPU] Add legalization rules for atomic ops
Implement rules for G_ATOMICRMW_{ADD, SUB, XCHG, AND, OR, XOR}.
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 11 +
.../AMDGPU/GlobalISel/atomicrmw-add-sub.ll | 300 ++++++++++++
.../AMDGPU/GlobalISel/atomicrmw-and.ll | 122 +++++
.../CodeGen/AMDGPU/GlobalISel/atomicrmw-or.ll | 122 +++++
.../AMDGPU/GlobalISel/atomicrmw-xchg.ll | 252 ++++++++++
.../AMDGPU/GlobalISel/atomicrmw-xor.ll | 122 +++++
.../regbankselect-atomicrmw-add-sub.mir | 461 ++++++++++++++++++
.../regbankselect-atomicrmw-add.mir | 63 ++-
.../regbankselect-atomicrmw-and.mir | 171 ++++++-
.../GlobalISel/regbankselect-atomicrmw-or.mir | 171 ++++++-
.../regbankselect-atomicrmw-sub.mir | 171 ++++++-
.../regbankselect-atomicrmw-xchg.mir | 3 +-
.../regbankselect-atomicrmw-xor.mir | 171 ++++++-
13 files changed, 2128 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-and.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-or.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xchg.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xor.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add-sub.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 01ccdf84f5303..4f4ad000166d0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -664,6 +664,17 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
.Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
+ // Atomic read-modify-write operations: result and value are always VGPR,
+ // pointer varies by address space.
+ addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
+ G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR})
+ .Any({{S32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}})
+ .Any({{S64, P0}, {{Vgpr64}, {VgprP0, Vgpr64}}})
+ .Any({{S32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}})
+ .Any({{S64, P1}, {{Vgpr64}, {VgprP1, Vgpr64}}})
+ .Any({{S32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}})
+ .Any({{S64, P3}, {{Vgpr64}, {VgprP3, Vgpr64}}});
+
bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
bool hasSMRDSmall = ST->hasScalarSubwordLoads();
bool usesTrue16 = ST->useRealTrue16Insts();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll
new file mode 100644
index 0000000000000..f0dcf0a11ec0c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-add-sub.ll
@@ -0,0 +1,300 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+
+; Test atomicrmw add and sub operations for different address spaces
+
+; =============================================================================
+; atomicrmw add - global address space (addrspace 1)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_global(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_add_i32_global:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw add ptr addrspace(1) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_add_i64_global(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_add_i64_global:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_add_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw add ptr addrspace(1) %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw sub - global address space (with metadata to prevent expansion)
+; =============================================================================
+; atomicrmw add - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_add_i32_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_add_rtn_u32 v0, v0, v1
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw add ptr addrspace(3) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_add_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_add_i64_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_add_rtn_u64 v[0:1], v0, v[1:2]
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw add ptr addrspace(3) %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw sub - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_sub_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_sub_i32_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_sub_rtn_u32 v0, v0, v1
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw sub ptr addrspace(3) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_sub_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_sub_i64_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_sub_rtn_u64 v[0:1], v0, v[1:2]
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw sub ptr addrspace(3) %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw add - flat address space (addrspace 0)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_flat(ptr %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_add_i32_flat:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: flat_atomic_add_u32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw add ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_add_i64_flat(ptr %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_add_i64_flat:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
+; GFX12-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX12-NEXT: s_mov_b32 s0, exec_lo
+; GFX12-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_cmpx_ne_u32_e64 s1, v5
+; GFX12-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX12-NEXT: s_cbranch_execnz .LBB9_3
+; GFX12-NEXT: ; %bb.1: ; %Flow
+; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT: s_and_not1_saveexec_b32 s0, s0
+; GFX12-NEXT: s_cbranch_execnz .LBB9_4
+; GFX12-NEXT: .LBB9_2: ; %atomicrmw.phi
+; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+; GFX12-NEXT: .LBB9_3: ; %atomicrmw.global
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: flat_atomic_add_u64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX12-NEXT: ; implicit-def: $vgpr2
+; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT: s_and_not1_saveexec_b32 s0, s0
+; GFX12-NEXT: s_cbranch_execz .LBB9_2
+; GFX12-NEXT: .LBB9_4: ; %atomicrmw.private
+; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
+; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc_lo
+; GFX12-NEXT: scratch_load_b64 v[0:1], v4, off
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
+; GFX12-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw add ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw add - VGPR inputs (loaded from memory)
+; =============================================================================
+
+define i32 @atomicrmw_add_i32_global_vgpr(ptr addrspace(1) %ptr, ptr addrspace(1) %val_ptr) {
+; GFX12-LABEL: atomicrmw_add_i32_global_vgpr:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_add_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %val = load i32, ptr addrspace(1) %val_ptr
+ %result = atomicrmw add ptr addrspace(1) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+; =============================================================================
+; atomicrmw sub with metadata - global address space (no expansion)
+; =============================================================================
+
+define i32 @atomicrmw_sub_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_sub_i32_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw sub ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_sub_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_sub_i64_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw sub ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i64 %result
+}
+
+define i32 @atomicrmw_sub_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_sub_i32_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw sub ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_sub_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_sub_i64_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_sub_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw sub ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i64 %result
+}
+
+!0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-and.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-and.ll
new file mode 100644
index 0000000000000..51e723df8391c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-and.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+
+; Test atomicrmw and operations for different address spaces
+
+; =============================================================================
+; atomicrmw and - global address space (with metadata to prevent expansion)
+; =============================================================================
+; atomicrmw and - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_and_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_and_i32_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_and_rtn_b32 v0, v0, v1
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw and ptr addrspace(3) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_and_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_and_i64_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_and_rtn_b64 v[0:1], v0, v[1:2]
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw and ptr addrspace(3) %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw and with metadata - global address space (no expansion)
+; =============================================================================
+
+define i32 @atomicrmw_and_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_and_i32_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_and_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw and ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_and_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_and_i64_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_and_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw and ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i64 %result
+}
+
+define i32 @atomicrmw_and_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_and_i32_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_and_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw and ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_and_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_and_i64_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_and_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw and ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i64 %result
+}
+
+!0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-or.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-or.ll
new file mode 100644
index 0000000000000..de296448319b6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-or.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+
+; Test atomicrmw or operations for different address spaces
+
+; =============================================================================
+; atomicrmw or - global address space (with metadata to prevent expansion)
+; =============================================================================
+; atomicrmw or - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_or_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_or_i32_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_or_rtn_b32 v0, v0, v1
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw or ptr addrspace(3) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_or_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_or_i64_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_or_rtn_b64 v[0:1], v0, v[1:2]
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw or ptr addrspace(3) %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw or with metadata - global address space (no expansion)
+; =============================================================================
+
+define i32 @atomicrmw_or_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_or_i32_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_or_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_or_i64_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i64 %result
+}
+
+define i32 @atomicrmw_or_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_or_i32_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_or_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_or_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_or_i64_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_or_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw or ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i64 %result
+}
+
+!0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xchg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xchg.ll
new file mode 100644
index 0000000000000..bc9cdd08ff62f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xchg.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+
+; Test atomicrmw xchg operations for different address spaces
+
+; =============================================================================
+; atomicrmw xchg - global address space (addrspace 1)
+; =============================================================================
+
+define i32 @atomicrmw_xchg_i32_global(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i32_global:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_swap_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_xchg_i64_global(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i64_global:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw xchg - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_xchg_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i32_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_xchg_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i64_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_storexchg_rtn_b64 v[0:1], v0, v[1:2]
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw xchg - flat address space (addrspace 0)
+; =============================================================================
+
+define i32 @atomicrmw_xchg_i32_flat(ptr %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i32_flat:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_xchg_i64_flat(ptr %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i64_flat:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX12-NEXT: s_mov_b32 s0, exec_lo
+; GFX12-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT: v_cmpx_ne_u32_e64 s1, v1
+; GFX12-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX12-NEXT: s_cbranch_execz .LBB5_2
+; GFX12-NEXT: ; %bb.1: ; %atomicrmw.global
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: flat_atomic_swap_b64 v[4:5], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX12-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX12-NEXT: .LBB5_2: ; %Flow
+; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT: s_and_not1_saveexec_b32 s0, s0
+; GFX12-NEXT: s_cbranch_execz .LBB5_4
+; GFX12-NEXT: ; %bb.3: ; %atomicrmw.private
+; GFX12-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
+; GFX12-NEXT: s_wait_alu depctr_va_vcc(0)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo
+; GFX12-NEXT: scratch_load_b64 v[4:5], v0, off
+; GFX12-NEXT: scratch_store_b64 v0, v[2:3], off
+; GFX12-NEXT: .LBB5_4: ; %atomicrmw.phi
+; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw xchg - VGPR inputs (loaded from memory)
+; =============================================================================
+
+define i32 @atomicrmw_xchg_i32_global_vgpr(ptr addrspace(1) %ptr, ptr addrspace(1) %val_ptr) {
+; GFX12-LABEL: atomicrmw_xchg_i32_global_vgpr:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v2, v[2:3], off
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_swap_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %val = load i32, ptr addrspace(1) %val_ptr
+ %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+; =============================================================================
+; atomicrmw xchg with metadata - global address space
+; Note: xchg is natively supported and doesn't expand to CAS for i32/i64,
+; but we still test metadata for consistency and documentation purposes
+; =============================================================================
+
+define i32 @atomicrmw_xchg_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i32_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_swap_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_xchg_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i64_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i64 %result
+}
+
+define i32 @atomicrmw_xchg_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i32_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_swap_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_xchg_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_xchg_i64_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_swap_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xchg ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i64 %result
+}
+
+!0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xor.ll
new file mode 100644
index 0000000000000..4629da7429b9a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw-xor.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+
+; Test atomicrmw xor operations for different address spaces
+
+; =============================================================================
+; atomicrmw xor - global address space (with metadata to prevent expansion)
+; =============================================================================
+; atomicrmw xor - local address space (addrspace 3)
+; =============================================================================
+
+define i32 @atomicrmw_xor_i32_local(ptr addrspace(3) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_xor_i32_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_xor_rtn_b32 v0, v0, v1
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i32 %val seq_cst
+ ret i32 %result
+}
+
+define i64 @atomicrmw_xor_i64_local(ptr addrspace(3) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_xor_i64_local:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: ds_xor_rtn_b64 v[0:1], v0, v[1:2]
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SE
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xor ptr addrspace(3) %ptr, i64 %val seq_cst
+ ret i64 %result
+}
+
+; =============================================================================
+; atomicrmw xor with metadata - global address space (no expansion)
+; =============================================================================
+
+define i32 @atomicrmw_xor_i32_global_no_remote_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_xor_i32_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_xor_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_xor_i64_global_no_remote_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_xor_i64_global_no_remote_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.remote.memory !0
+ ret i64 %result
+}
+
+define i32 @atomicrmw_xor_i32_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i32 %val) {
+; GFX12-LABEL: atomicrmw_xor_i32_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_xor_b32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xor ptr addrspace(1) %ptr, i32 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i32 %result
+}
+
+define i64 @atomicrmw_xor_i64_global_no_fine_grained_memory(ptr addrspace(1) %ptr, i64 %val) {
+; GFX12-LABEL: atomicrmw_xor_i64_global_no_fine_grained_memory:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_wb scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_storecnt 0x0
+; GFX12-NEXT: global_atomic_xor_b64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_inv scope:SCOPE_SYS
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %result = atomicrmw xor ptr addrspace(1) %ptr, i64 %val seq_cst, !amdgpu.no.fine.grained.memory !0
+ ret i64 %result
+}
+
+!0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add-sub.mir
new file mode 100644
index 0000000000000..b36e1268bc10c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add-sub.mir
@@ -0,0 +1,461 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
+
+# Test G_ATOMICRMW_ADD and G_ATOMICRMW_SUB register bank selection and legalization
+# for different address spaces (flat P0, global P1, local P3) and data sizes (S32, S64)
+
+---
+name: atomicrmw_add_flat_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomicrmw_add_flat_s32_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]]
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_add_flat_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_add_flat_s32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]]
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_add_flat_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_add_flat_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64))
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_add_global_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomicrmw_add_global_s32_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]]
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_add_global_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_add_global_s32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]]
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_add_global_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_add_global_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_add_global_s64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_add_global_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_add_local_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: atomicrmw_add_local_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]]
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_add_local_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: atomicrmw_add_local_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_ADD]], [[ATOMICRMW_ADD]]
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_add_local_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_add_local_s64_ss
+ ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 3)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_flat_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomicrmw_sub_flat_s32_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]]
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_flat_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_sub_flat_s32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]]
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_flat_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_flat_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64))
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 0)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_global_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomicrmw_sub_global_s32_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]]
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_global_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_sub_global_s32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]]
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_global_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_global_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 1)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_global_s64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_global_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 1)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_local_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: atomicrmw_sub_local_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]]
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_local_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: atomicrmw_sub_local_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_SUB]], [[ATOMICRMW_SUB]]
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_sub_local_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_local_s64_ss
+ ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 3)
+ %3:_(s64) = G_AND %2, %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir
index bcd676f31c90a..97482792553c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
---
name: atomicrmw_add_global_i32_ss
@@ -61,3 +60,63 @@ body: |
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3)
...
+
+---
+name: atomicrmw_add_global_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_add_global_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1)
+...
+
+---
+name: atomicrmw_add_flat_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_add_flat_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64))
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0)
+...
+
+---
+name: atomicrmw_add_local_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_add_local_i64_ss
+ ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 3)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir
index e4fe9c5c7d9f3..878cd0783e5e3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1201 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
---
name: atomicrmw_and_global_i32_ss
@@ -22,6 +21,62 @@ body: |
%2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1)
...
+---
+name: atomicrmw_and_global_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_and_global_i32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1)
+...
+
+---
+name: atomicrmw_and_global_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_and_global_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 1)
+...
+
+---
+name: atomicrmw_and_global_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_and_global_i64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 1)
+...
+
---
name: atomicrmw_and_flat_i32_ss
legalized: true
@@ -42,6 +97,62 @@ body: |
%2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 0)
...
+---
+name: atomicrmw_and_flat_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_and_flat_i32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 0)
+...
+
+---
+name: atomicrmw_and_flat_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_and_flat_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64))
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 0)
+...
+
+---
+name: atomicrmw_and_flat_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_and_flat_i64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64))
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 0)
+...
+
---
name: atomicrmw_and_local_i32_ss
legalized: true
@@ -61,3 +172,59 @@ body: |
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3)
...
+
+---
+name: atomicrmw_and_local_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: atomicrmw_and_local_i32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3)
+...
+
+---
+name: atomicrmw_and_local_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_and_local_i64_ss
+ ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 3)
+...
+
+---
+name: atomicrmw_and_local_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_and_local_i64_vv
+ ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s64), addrspace 3)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir
index 3a16d72cb8ebd..5aa0fdeae9499 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1201 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
---
name: atomicrmw_or_global_i32_ss
@@ -22,6 +21,62 @@ body: |
%2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1)
...
+---
+name: atomicrmw_or_global_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_or_global_i32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1)
+...
+
+---
+name: atomicrmw_or_global_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_or_global_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 1)
+...
+
+---
+name: atomicrmw_or_global_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_or_global_i64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 1)
+...
+
---
name: atomicrmw_or_flat_i32_ss
legalized: true
@@ -42,6 +97,62 @@ body: |
%2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 0)
...
+---
+name: atomicrmw_or_flat_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_or_flat_i32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 0)
+...
+
+---
+name: atomicrmw_or_flat_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_or_flat_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64))
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 0)
+...
+
+---
+name: atomicrmw_or_flat_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_or_flat_i64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64))
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 0)
+...
+
---
name: atomicrmw_or_local_i32_ss
legalized: true
@@ -61,3 +172,59 @@ body: |
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3)
...
+
+---
+name: atomicrmw_or_local_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: atomicrmw_or_local_i32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3)
+...
+
+---
+name: atomicrmw_or_local_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_or_local_i64_ss
+ ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 3)
+...
+
+---
+name: atomicrmw_or_local_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_or_local_i64_vv
+ ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s64), addrspace 3)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir
index e7b9c8efff6ce..c87fc39055d9b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1201 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
---
name: atomicrmw_sub_global_i32_ss
@@ -22,6 +21,62 @@ body: |
%2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1)
...
+---
+name: atomicrmw_sub_global_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_sub_global_i32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1)
+...
+
+---
+name: atomicrmw_sub_global_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_global_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 1)
+...
+
+---
+name: atomicrmw_sub_global_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_global_i64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 1)
+...
+
---
name: atomicrmw_sub_flat_i32_ss
legalized: true
@@ -42,6 +97,62 @@ body: |
%2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0)
...
+---
+name: atomicrmw_sub_flat_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_sub_flat_i32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0)
+...
+
+---
+name: atomicrmw_sub_flat_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_flat_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64))
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 0)
+...
+
+---
+name: atomicrmw_sub_flat_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_flat_i64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64))
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 0)
+...
+
---
name: atomicrmw_sub_local_i32_ss
legalized: true
@@ -61,3 +172,59 @@ body: |
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3)
...
+
+---
+name: atomicrmw_sub_local_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: atomicrmw_sub_local_i32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3)
+...
+
+---
+name: atomicrmw_sub_local_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_local_i64_ss
+ ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 3)
+...
+
+---
+name: atomicrmw_sub_local_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_sub_local_i64_vv
+ ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s64), addrspace 3)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir
index db01a21d061d3..643a69daf8789 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
---
name: atomicrmw_xchg_global_i32_ss
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir
index 903d6f54ba46d..9e5c088fb1147 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1201 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
---
name: atomicrmw_xor_global_i32_ss
@@ -22,6 +21,62 @@ body: |
%2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1)
...
+---
+name: atomicrmw_xor_global_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_xor_global_i32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1)
+...
+
+---
+name: atomicrmw_xor_global_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_xor_global_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 1)
+...
+
+---
+name: atomicrmw_xor_global_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_xor_global_i64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 1)
+...
+
---
name: atomicrmw_xor_flat_i32_ss
legalized: true
@@ -42,6 +97,62 @@ body: |
%2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 0)
...
+---
+name: atomicrmw_xor_flat_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_xor_flat_i32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 0)
+...
+
+---
+name: atomicrmw_xor_flat_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_xor_flat_i64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64))
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 0)
+...
+
+---
+name: atomicrmw_xor_flat_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_xor_flat_i64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64))
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 0)
+...
+
---
name: atomicrmw_xor_local_i32_ss
legalized: true
@@ -61,3 +172,59 @@ body: |
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3)
...
+
+---
+name: atomicrmw_xor_local_i32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: atomicrmw_xor_local_i32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3)
+...
+
+---
+name: atomicrmw_xor_local_i64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_xor_local_i64_ss
+ ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 3)
+...
+
+---
+name: atomicrmw_xor_local_i64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_xor_local_i64_vv
+ ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s64), addrspace 3)
+...
More information about the llvm-commits
mailing list