[llvm] [AMDGPU] Handle AV classes in SIFixSGPRCopies::processPHINode (PR #169038)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 21 05:45:06 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
Fix a problem exposed by #<!-- -->166483 using AV classes in more places.
`isVectorRegister` only accepts registers of VGPR or AGPR classes.
`hasVectorRegisters` additionally accepts the combined AV classes.
Fixes: #<!-- -->168761
---
Patch is 266.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169038.diff
9 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll (+315-327)
- (modified) llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll (+40-37)
- (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+24-24)
- (modified) llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/mfma-loop.ll (+2040-838)
- (modified) llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+34-38)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index e1647b76702c4..3e4b25dd2f663 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -856,7 +856,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
}
}
- if (TRI->isVectorRegister(*MRI, PHIRes) ||
+ if (TRI->hasVectorRegisters(MRI->getRegClass(PHIRes)) ||
RC0 == &AMDGPU::VReg_1RegClass) {
LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
TII->legalizeOperands(MI, MDT);
diff --git a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
index 5cceb918b755e..b8962fa29e8f1 100644
--- a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
@@ -926,12 +926,12 @@ define void @flat_atomic_xchg_i64_ret_av_av(ptr %ptr) #0 {
; GFX90A-LABEL: flat_atomic_xchg_i64_ret_av_av:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x50, v0
+; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x50, v0
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
-; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
+; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@@ -939,23 +939,23 @@ define void @flat_atomic_xchg_i64_ret_av_av(ptr %ptr) #0 {
; GFX90A-NEXT: s_cbranch_execz .LBB14_2
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
; GFX90A-NEXT: buffer_wbl2
-; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5], v[2:3] glc
+; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: buffer_invl2
; GFX90A-NEXT: buffer_wbinvl1_vol
-; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: .LBB14_2: ; %Flow
; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB14_4
; GFX90A-NEXT: ; %bb.3: ; %atomicrmw.private
-; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GFX90A-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
+; GFX90A-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: buffer_store_dword v4, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: .LBB14_4: ; %atomicrmw.phi
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: s_waitcnt vmcnt(2)
@@ -1016,12 +1016,12 @@ define void @flat_atomic_xchg_i64_ret_av_v(ptr %ptr) #0 {
; GFX90A-LABEL: flat_atomic_xchg_i64_ret_av_v:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x50, v0
+; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x50, v0
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
-; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
+; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@@ -1029,23 +1029,23 @@ define void @flat_atomic_xchg_i64_ret_av_v(ptr %ptr) #0 {
; GFX90A-NEXT: s_cbranch_execz .LBB15_2
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
; GFX90A-NEXT: buffer_wbl2
-; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5], v[2:3] glc
+; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: buffer_invl2
; GFX90A-NEXT: buffer_wbinvl1_vol
-; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: .LBB15_2: ; %Flow
; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB15_4
; GFX90A-NEXT: ; %bb.3: ; %atomicrmw.private
-; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GFX90A-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
+; GFX90A-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: buffer_store_dword v4, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: .LBB15_4: ; %atomicrmw.phi
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: s_waitcnt vmcnt(2)
@@ -1294,12 +1294,12 @@ define void @flat_atomic_xchg_i64_ret_v_av(ptr %ptr) #0 {
; GFX90A-LABEL: flat_atomic_xchg_i64_ret_v_av:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x50, v0
+; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x50, v0
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
-; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
+; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
@@ -1307,23 +1307,23 @@ define void @flat_atomic_xchg_i64_ret_v_av(ptr %ptr) #0 {
; GFX90A-NEXT: s_cbranch_execz .LBB18_2
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
; GFX90A-NEXT: buffer_wbl2
-; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[4:5], v[2:3] glc
+; GFX90A-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: buffer_invl2
; GFX90A-NEXT: buffer_wbinvl1_vol
-; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: .LBB18_2: ; %Flow
; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB18_4
; GFX90A-NEXT: ; %bb.3: ; %atomicrmw.private
-; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GFX90A-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
+; GFX90A-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: buffer_store_dword v4, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: .LBB18_4: ; %atomicrmw.phi
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: s_waitcnt vmcnt(2)
@@ -6406,35 +6406,35 @@ define void @flat_atomic_add_i64_ret_av_av(ptr %ptr) #0 {
; GFX90A-LABEL: flat_atomic_add_i64_ret_av_av:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x50, v0
+; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x50, v0
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
-; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
+; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB90_2
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
-; GFX90A-NEXT: flat_atomic_add_x2 v[0:1], v[4:5], v[2:3] glc
+; GFX90A-NEXT: flat_atomic_add_x2 v[0:1], v[2:3], v[4:5] glc
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: .LBB90_2: ; %Flow
; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB90_4
; GFX90A-NEXT: ; %bb.3: ; %atomicrmw.private
-; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GFX90A-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
+; GFX90A-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
-; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: v_add_co_u32_e32 v3, vcc, v0, v4
+; GFX90A-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc
+; GFX90A-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_store_dword v4, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: .LBB90_4: ; %atomicrmw.phi
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -6591,35 +6591,35 @@ define void @flat_atomic_sub_i64_ret_av_av(ptr %ptr) #0 {
; GFX90A-LABEL: flat_atomic_sub_i64_ret_av_av:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x50, v0
+; GFX90A-NEXT: v_add_co_u32_e32 v2, vcc, 0x50, v0
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
-; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
+; GFX90A-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:3]
+; GFX90A-NEXT: ; def v[4:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB92_2
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
-; GFX90A-NEXT: flat_atomic_sub_x2 v[0:1], v[4:5], v[2:3] glc
+; GFX90A-NEXT: flat_atomic_sub_x2 v[0:1], v[2:3], v[4:5] glc
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: .LBB92_2: ; %Flow
; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB92_4
; GFX90A-NEXT: ; %bb.3: ; %atomicrmw.private
-; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
-; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc
-; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; GFX90A-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
+; GFX90A-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2
-; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc
-; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v0, v4
+; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v1, v5, vcc
+; GFX90A-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_store_dword v4, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: .LBB92_4: ; %atomicrmw.phi
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
@@ -8881,28 +8881,28 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
; GFX90A-LABEL: flat_atomic_usub_sat_i64_ret_av_av:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_add_co_u32_e32 v6, vcc, 0x50, v0
+; GFX90A-NEXT: v_add_co_u32_e32 v4, vcc, 0x50, v0
; GFX90A-NEXT: s_mov_b64 s[4:5], src_private_base
-; GFX90A-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
-; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v7
+; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
+; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:5]
+; GFX90A-NEXT: ; def v[6:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB114_4
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.global
-; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[6:7]
+; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[4:5]
; GFX90A-NEXT: s_mov_b64 s[6:7], 0
; GFX90A-NEXT: .LBB114_2: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
-; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
+; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
+; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
-; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
+; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
; GFX90A-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
@@ -8911,20 +8911,20 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
; GFX90A-NEXT: s_cbranch_execnz .LBB114_2
; GFX90A-NEXT: ; %bb.3: ; %Flow
; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7]
-; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7
; GFX90A-NEXT: .LBB114_4: ; %Flow3
; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GFX90A-NEXT: s_cbranch_execz .LBB114_6
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
-; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
-; GFX90A-NEXT: v_cndmask_b32_e32 v2, -1, v6, vcc
+; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; GFX90A-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc
; GFX90A-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen
; GFX90A-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_waitcnt vmcnt(1)
-; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v0, v4
+; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v0, v6
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v1, v5, vcc
+; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v1, v7, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
; GFX90A-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen
@@ -9027,29 +9027,28 @@ define void @flat_atomic_fadd_f32_ret_a_a(ptr %ptr) #0 {
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.check.private
; GFX90A-NEXT: s_mov_b64 s[6:7], src_private_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s7, v1
-; GFX90A-NEXT: ; implicit-def: $agpr0
+; GFX90A-NEXT: ; implicit-def: $vgpr3
; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; GFX90A-NEXT: s_cbranch_execz .LBB115_3
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.global
-; GFX90A-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off glc
-; GFX90A-NEXT: ; implicit-def: $vgpr2
-; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
+; GFX90A-NEXT: global_atomic_add_f32 v3, v[0:1], v2, off glc
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX90A-NEXT: ; implicit-def: $vgpr2
; GFX90A-NEXT: .LBB115_3: ; %Flow
; GFX90A-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
; GFX90A-NEXT: s_cbranch_execz .LBB115_5
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.private
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
-; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: v_add_f32_e32 v2, v1, v2
-; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
-; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
+; GFX90A-NEXT: v_add_f32_e32 v1, v3, v2
+; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX90A-NEXT: .LBB115_5: ; %Flow1
; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7]
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: v_accvgpr_write_b32 a0, v3
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: ; implicit-def: $vgpr2
; GFX90A-NEXT: .LBB115_6: ; %Flow2
@@ -9066,7 +9065,6 @@ define void @flat_atomic_fadd_f32_ret_a_a(ptr %ptr) #0 {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use a0
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: flat_atomic_fadd_f32_ret_a_a:
@@ -9829,33 +9827,31 @@ define void @flat_atomic_fadd_f64_ret_a_a(ptr %ptr) #0 {
; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.check.private
; GFX90A-NEXT: s_mov_b64 s[6:7], src_private_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s7, v1
-; GFX90A-NEXT: ; implicit-def: $agpr0_agpr1
+; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; GFX90A-NEXT: s_cbranch_execz .LBB127_3
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.global
-; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
-; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
+; GFX90A-NEXT: global_atomic_add_f64 v[4:5], v[0:1], v[2:3], off glc
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GFX90A-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX90A-NEXT: .LBB127_3: ; %Flow
; GFX90A-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
; GFX90A-NEXT: s_cbranch_execz .LBB127_5
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.private
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc
-; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
-; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
+; GFX90A-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc
+; GFX90A-NEXT: buffer_load_dword v4, v6, s[0:3], 0 offen
+; GFX90A-NEXT: buffer_load_dword v5, v6, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
-; GFX90A-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
-; GFX90A-NEXT: v_accvgpr_write_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169038
More information about the llvm-commits
mailing list