[llvm] [AMDGPU] Add legalization rules for G_ATOMICRMW_FADD (PR #175257)
Anshil Gandhi via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 9 06:25:49 PST 2026
https://github.com/gandhi56 updated https://github.com/llvm/llvm-project/pull/175257
>From 2e0bfc430f5ce8c0a26e86ea91693f3d0584dea7 Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <Anshil.Gandhi at amd.com>
Date: Fri, 9 Jan 2026 01:41:13 -0600
Subject: [PATCH] [AMDGPU] Add regbankselect rules for G_ATOMICRMW_FADD
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 29 +-
.../AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll | 5 +-
.../AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll | 3 +-
.../GlobalISel/flat-atomic-fadd.v2f16.ll | 2 +-
.../AMDGPU/GlobalISel/fp-atomics-gfx942.ll | 178 +++++++++---
.../global-atomic-fadd.v2f16-no-rtn.ll | 5 +-
.../global-atomic-fadd.v2f16-rtn.ll | 3 +-
.../regbankselect-atomicrmw-fadd.mir | 263 +++++++++++++++++-
.../regbankselect-atomicrmw-fadd.v2f16.mir | 66 +++++
9 files changed, 500 insertions(+), 54 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.v2f16.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 1c2346215b38c..a05dba224f817 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -76,6 +76,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return isAnyPtr(MRI.getType(Reg), 64);
case Ptr128:
return isAnyPtr(MRI.getType(Reg), 128);
+ case V2S16:
+ return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
case V2S32:
return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
case V3S32:
@@ -758,16 +760,26 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
.Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
- // Atomic read-modify-write operations: result and value are always VGPR,
- // pointer varies by address space.
addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR})
- .Any({{S32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}})
- .Any({{S64, P0}, {{Vgpr64}, {VgprP0, Vgpr64}}})
- .Any({{S32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}})
- .Any({{S64, P1}, {{Vgpr64}, {VgprP1, Vgpr64}}})
- .Any({{S32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}})
- .Any({{S64, P3}, {{Vgpr64}, {VgprP3, Vgpr64}}});
+ .Any({{DivS32, Ptr32, S32}, {{Vgpr32}, {VgprPtr32, Vgpr32}}})
+ .Any({{DivS32, Ptr64, S32}, {{Vgpr32}, {VgprPtr64, Vgpr32}}})
+ .Any({{DivS64, Ptr32, S64}, {{Vgpr64}, {VgprPtr32, Vgpr64}}})
+ .Any({{DivS64, Ptr64, S64}, {{Vgpr64}, {VgprPtr64, Vgpr64}}});
+
+ bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
+ bool HasAtomicBufferGlobalPkAddF16Insts = ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() || ST->hasAtomicBufferGlobalPkAddF16Insts();
+ bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
+ addRulesForGOpcs({G_ATOMICRMW_FADD})
+ .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
+ .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
+ .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
+ .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
+ .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
+ .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}})
+ .Any({{DivV2S16, P0, V2S16}, {{VgprV2S16}, {VgprP0, VgprV2S16}}}, HasAtomicFlatPkAdd16Insts)
+ .Any({{DivV2S16, P1, V2S16}, {{VgprV2S16}, {VgprP1, VgprV2S16}}}, HasAtomicBufferGlobalPkAddF16Insts)
+ .Any({{DivV2S16, P3, V2S16}, {{VgprV2S16}, {VgprP3, VgprV2S16}}}, HasAtomicDsPkAdd16Insts);
addRulesForGOpcs({G_ATOMIC_CMPXCHG})
.Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
@@ -1002,6 +1014,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
.Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
.Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
+
// clang-format on
addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
index da25ac06be26d..5e87afa4461d1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) {
; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
index bf3697924c22c..93968937a9942 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) {
; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
index c349051bcc954..b92e139d13fe2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) {
; GFX942-LABEL: name: flat_atomic_fadd_v2f16_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
index 99c3765b0fd1c..eb0165d367b6e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll
@@ -1,5 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942
+
+; =============================================================================
+; Flat atomic fadd - f32
+; =============================================================================
define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat:
@@ -50,30 +54,67 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
ret float %ret
}
-define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
-; GFX942-LABEL: local_atomic_fadd_ret_v2f16_offset:
+; =============================================================================
+; Flat atomic fadd - v2f16
+; =============================================================================
+
+define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
+; GFX942-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
-; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
- %result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
+ %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
+ %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
ret <2 x half> %result
}
-define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
-; GFX942-LABEL: local_atomic_fadd_noret_v2f16_offset:
+define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
+; GFX942-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ds_pk_add_f16 v0, v1 offset:65532
-; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: buffer_inv sc1
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
- %unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
+ %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
+ %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
ret void
}
+; =============================================================================
+; Global atomic fadd - f32
+; =============================================================================
+
+define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) {
+; GFX942-LABEL: global_atomic_fadd_f32_no_rtn_atomicrmw:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off
+; GFX942-NEXT: s_endpgm
+ %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
+ ret void
+}
+
+define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) {
+; GFX942-LABEL: global_atomic_fadd_f32_rtn_atomicrmw:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: ; return to shader part epilog
+ %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
+ ret float %ret
+}
+
+; =============================================================================
+; Global atomic fadd - v2f16
+; =============================================================================
+
define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
; GFX942-LABEL: global_atomic_fadd_ret_v2f16_agent_offset:
; GFX942: ; %bb.0:
@@ -104,36 +145,111 @@ define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr,
ret void
}
-define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
-; GFX942-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
+; =============================================================================
+; Global atomic fadd - f64
+; =============================================================================
+
+define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) {
+; GFX942-LABEL: global_atomic_fadd_f64_no_rtn_atomicrmw:
; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: buffer_wbl2 sc1
+; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off
+; GFX942-NEXT: s_endpgm
+ %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
+ ret void
+}
+
+define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) {
+; GFX942-LABEL: global_atomic_fadd_f64_rtn_atomicrmw:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
-; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: v_readfirstlane_b32 s0, v0
+; GFX942-NEXT: v_readfirstlane_b32 s1, v1
+; GFX942-NEXT: ; return to shader part epilog
+ %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
+ ret double %ret
+}
+
+; =============================================================================
+; Local atomic fadd - f32
+; =============================================================================
+
+define amdgpu_ps void @local_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(3) %ptr, float %data) {
+; GFX942-LABEL: local_atomic_fadd_f32_no_rtn_atomicrmw:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: ds_add_f32 v0, v1
+; GFX942-NEXT: s_endpgm
+ %ret = atomicrmw fadd ptr addrspace(3) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
+ ret void
+}
+
+define amdgpu_ps float @local_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(3) %ptr, float %data) {
+; GFX942-LABEL: local_atomic_fadd_f32_rtn_atomicrmw:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: ds_add_rtn_f32 v0, v0, v1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: ; return to shader part epilog
+ %ret = atomicrmw fadd ptr addrspace(3) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
+ ret float %ret
+}
+
+; =============================================================================
+; Local atomic fadd - v2f16
+; =============================================================================
+
+define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
+; GFX942-LABEL: local_atomic_fadd_ret_v2f16_offset:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
- %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
+ %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
+ %result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
ret <2 x half> %result
}
-define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
-; GFX942-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
+define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
+; GFX942-LABEL: local_atomic_fadd_noret_v2f16_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: buffer_wbl2 sc1
-; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
-; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX942-NEXT: buffer_inv sc1
+; GFX942-NEXT: ds_pk_add_f16 v0, v1 offset:65532
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
- %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
+ %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
+ %unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
ret void
}
+; =============================================================================
+; Local atomic fadd - f64
+; =============================================================================
+
+define amdgpu_ps void @local_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(3) %ptr, double %data) {
+; GFX942-LABEL: local_atomic_fadd_f64_no_rtn_atomicrmw:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: ds_add_f64 v0, v[4:5]
+; GFX942-NEXT: s_endpgm
+ %ret = atomicrmw fadd ptr addrspace(3) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
+ ret void
+}
+
+define amdgpu_ps double @local_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(3) %ptr, double %data) {
+; GFX942-LABEL: local_atomic_fadd_f64_rtn_atomicrmw:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5]
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_readfirstlane_b32 s0, v0
+; GFX942-NEXT: v_readfirstlane_b32 s1, v1
+; GFX942-NEXT: ; return to shader part epilog
+ %ret = atomicrmw fadd ptr addrspace(3) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0
+ ret double %ret
+}
+
attributes #0 = { denormal_fpenv(float: ieee|ieee) }
!0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
index 9c0db4cd162fc..60dcd014a3c8c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
index 62620a8875a3a..1c680ec154e2a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir
index 11833cab3c07f..8e0ff35a8afd4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir
@@ -1,15 +1,198 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
---
-name: atomicrmw_fadd_local_i32_ss
+name: atomicrmw_fadd_global_f32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomicrmw_fadd_global_f32_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]]
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 1)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_global_f32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_fadd_global_f32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]]
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 1)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_global_f64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_fadd_global_f64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s64), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 1)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_global_f64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_fadd_global_f64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s64), addrspace 1)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 1)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_flat_f32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomicrmw_fadd_flat_f32_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]]
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 0)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_flat_f32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_fadd_flat_f32_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]]
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s32) = COPY $vgpr2
+ %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 0)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_flat_f64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_fadd_flat_f64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s64))
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 0)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_flat_f64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_fadd_flat_f64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s64))
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 0)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_local_f32_ss
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1
- ; CHECK-LABEL: name: atomicrmw_fadd_local_i32_ss
+ ; CHECK-LABEL: name: atomicrmw_fadd_local_f32_ss
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
@@ -17,7 +200,79 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]]
%0:_(p3) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_local_f32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: atomicrmw_fadd_local_f32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]]
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3)
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_local_f64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: atomicrmw_fadd_local_f64_ss
+ ; CHECK: liveins: $sgpr0, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s64), addrspace 3)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 3)
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_local_f64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: atomicrmw_fadd_local_f64_vv
+ ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s64), addrspace 3)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ATOMICRMW_FADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(p3) = COPY $vgpr0
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s64), addrspace 3)
+ %3:_(s64) = G_AND %2, %2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.v2f16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.v2f16.mir
new file mode 100644
index 0000000000000..9d1476e05a0cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.v2f16.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s
+
+# Test G_ATOMICRMW_FADD with V2F16 (V2S16) type - register bank selection and
+# legalization for flat, global, and local address spaces.
+# Requires: hasAtomicFlatPkAdd16Insts, hasAtomicBufferGlobalPkAddF16*, hasAtomicDsPkAdd16Insts
+
+---
+name: atomicrmw_fadd_flat_v2f16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_fadd_flat_v2f16_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ATOMICRMW_FADD [[COPY]](p0), [[COPY1]] :: (load store seq_cst (<2 x s16>))
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]]
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s16>) = COPY $vgpr2
+ %2:_(<2 x s16>) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (<2 x s16>), addrspace 0)
+ %3:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_global_v2f16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: atomicrmw_fadd_global_v2f16_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (<2 x s16>), addrspace 1)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]]
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s16>) = COPY $vgpr2
+ %2:_(<2 x s16>) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (<2 x s16>), addrspace 1)
+ %3:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: atomicrmw_fadd_local_v2f16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: atomicrmw_fadd_local_v2f16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (<2 x s16>), addrspace 3)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ATOMICRMW_FADD]], [[ATOMICRMW_FADD]]
+ %0:_(p3) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (<2 x s16>), addrspace 3)
+ %3:_(<2 x s16>) = G_AND %2, %2
+...
More information about the llvm-commits
mailing list