[llvm] RenameIndependentSubregs: Add missing sub-range for new IMPLICIT_DEFs (PR #89050)

Petar Avramovic via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 19 08:43:30 PDT 2024


https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/89050

>From 3e4959e9adfa35a62c1acc3baf5543a0bf9fae7f Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Wed, 17 Apr 2024 17:45:35 +0200
Subject: [PATCH 1/2] AMDGPU: precommit test for bug in
 RenameIndependentSubregs

---
 ...rval-bug-in-rename-independent-subregs.mir | 474 ++++++++++++++++++
 1 file changed, 474 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir

diff --git a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
new file mode 100644
index 00000000000000..9c8244ef1e38b8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
@@ -0,0 +1,474 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -start-before=rename-independent-subregs -mattr=-wavefrontsize32,+wavefrontsize64 -stop-before=amdgpu-mark-last-scratch-load %s -o - | FileCheck -check-prefix=RegAlloc %s
+# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -start-before=rename-independent-subregs -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=machine-cp %s -o - | FileCheck -check-prefix=DeadInstDelete %s
+
+---
+name: _amdgpu_cs_main
+tracksRegLiveness: true
+body: |
+  ; RegAlloc-LABEL: name: _amdgpu_cs_main
+  ; RegAlloc: bb.0:
+  ; RegAlloc-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; RegAlloc-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT:   renamable $sgpr0 = COPY $sgpr3
+  ; RegAlloc-NEXT:   renamable $sgpr12_sgpr13 = COPY $exec
+  ; RegAlloc-NEXT:   renamable $sgpr8 = COPY $sgpr1
+  ; RegAlloc-NEXT:   renamable $sgpr9 = COPY $sgpr2
+  ; RegAlloc-NEXT:   renamable $sgpr2_sgpr3 = S_GETPC_B64_pseudo
+  ; RegAlloc-NEXT:   renamable $sgpr10_sgpr11 = S_MOV_B64_IMM_PSEUDO -4294967296
+  ; RegAlloc-NEXT:   renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, renamable $sgpr10_sgpr11, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr1 = COPY killed renamable $sgpr10
+  ; RegAlloc-NEXT:   renamable $sgpr10_sgpr11 = S_OR_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr10_sgpr11, 16, 0 :: (invariant load (<4 x s32>), addrspace 4)
+  ; RegAlloc-NEXT:   renamable $sgpr7 = IMPLICIT_DEF
+  ; RegAlloc-NEXT:   renamable $sgpr14 = S_LSHL_B32 renamable $sgpr7, 5, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $vgpr1 = COPY renamable $sgpr14
+  ; RegAlloc-NEXT:   renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr1, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $vgpr5 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr16 = V_MBCNT_HI_U32_B32_e64 -1, killed $vgpr5, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr14 = S_OR_B32 killed renamable $sgpr14, 16, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $vgpr5 = V_LSHLREV_B32_e64 5, $vgpr16, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr6 = COPY killed renamable $sgpr14
+  ; RegAlloc-NEXT:   renamable $vgpr7 = V_OR_B32_e64 16, $vgpr5, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr6, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $sgpr14 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr8_vgpr9_vgpr10_vgpr11 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr5, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $sgpr15 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr7, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr1 = COPY renamable $vgpr7
+  ; RegAlloc-NEXT:   renamable $sgpr26 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr27 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr28 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
+  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr14_sgpr15, killed renamable $sgpr18_sgpr19, implicit-def $scc
+  ; RegAlloc-NEXT:   renamable $sgpr29 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr14 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr20_sgpr21, implicit-def $scc
+  ; RegAlloc-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr15 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr26_sgpr27, killed renamable $sgpr22_sgpr23, implicit-def $scc
+  ; RegAlloc-NEXT:   renamable $sgpr16 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr28_sgpr29, killed renamable $sgpr24_sgpr25, implicit-def $scc
+  ; RegAlloc-NEXT:   renamable $sgpr17 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr14, killed renamable $sgpr15, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr16, killed renamable $sgpr14, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr17, killed renamable $sgpr14, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr16_sgpr17 = V_CMP_NE_U32_e64 $sgpr7, killed $vgpr16, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr14, 1, implicit-def dead $scc
+  ; RegAlloc-NEXT:   $scc = COPY killed renamable $sgpr14
+  ; RegAlloc-NEXT:   renamable $sgpr14 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   renamable $sgpr12_sgpr13 = S_XOR_B64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $vgpr3 = IMPLICIT_DEF
+  ; RegAlloc-NEXT:   renamable $sgpr16_sgpr17 = COPY $exec, implicit-def $exec
+  ; RegAlloc-NEXT:   renamable $sgpr18_sgpr19 = S_AND_B64 renamable $sgpr16_sgpr17, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr12_sgpr13 = S_XOR_B64 renamable $sgpr18_sgpr19, killed renamable $sgpr16_sgpr17, implicit-def dead $scc
+  ; RegAlloc-NEXT:   $exec = S_MOV_B64_term killed renamable $sgpr18_sgpr19
+  ; RegAlloc-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; RegAlloc-NEXT:   S_BRANCH %bb.2
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT: bb.1:
+  ; RegAlloc-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
+  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr1_vgpr2:0x0000000000000003, $vgpr8_vgpr9_vgpr10_vgpr11:0x00000000000000FF, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000003F00
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT:   renamable $sgpr12_sgpr13 = S_OR_SAVEEXEC_B64 killed renamable $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; RegAlloc-NEXT:   $exec = S_XOR_B64_term $exec, renamable $sgpr12_sgpr13, implicit-def $scc
+  ; RegAlloc-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; RegAlloc-NEXT:   S_BRANCH %bb.3
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT: bb.2:
+  ; RegAlloc-NEXT:   successors: %bb.1(0x80000000)
+  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT:   renamable $sgpr15 = S_OR_B32 renamable $sgpr14, 2, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr15
+  ; RegAlloc-NEXT:   renamable $vgpr8_vgpr9 = IMPLICIT_DEF
+  ; RegAlloc-NEXT:   renamable $vgpr1_vgpr2 = IMPLICIT_DEF
+  ; RegAlloc-NEXT:   renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
+  ; RegAlloc-NEXT:   S_BRANCH %bb.1
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT: bb.3:
+  ; RegAlloc-NEXT:   successors: %bb.5(0x80000000)
+  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr1_vgpr2:0x0000000000000003, $vgpr8_vgpr9_vgpr10_vgpr11:0x00000000000000FF, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000003F00
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT:   renamable $sgpr7 = S_LSHL_B32 killed renamable $sgpr7, 5, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $vgpr2 = COPY renamable $sgpr7
+  ; RegAlloc-NEXT:   renamable $sgpr7 = S_OR_B32 killed renamable $sgpr7, 16, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr7
+  ; RegAlloc-NEXT:   renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr2, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr7_vgpr8_vgpr9_vgpr10 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $sgpr0 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr1 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr2 = V_READFIRSTLANE_B32 killed $vgpr6, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr3 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
+  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr20_sgpr21, killed renamable $sgpr16_sgpr17, implicit-def $scc
+  ; RegAlloc-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr7 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr22_sgpr23, killed renamable $sgpr18_sgpr19, implicit-def $scc
+  ; RegAlloc-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr10, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr15 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr24_sgpr25, killed renamable $sgpr0_sgpr1, implicit-def $scc
+  ; RegAlloc-NEXT:   renamable $sgpr0 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr2_sgpr3, implicit-def $scc
+  ; RegAlloc-NEXT:   renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   renamable $sgpr2 = S_AND_B32 killed renamable $sgpr7, killed renamable $sgpr15, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, killed renamable $sgpr2, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr0 = S_AND_B32 killed renamable $sgpr1, killed renamable $sgpr0, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, 1, implicit-def dead $scc
+  ; RegAlloc-NEXT:   $scc = COPY killed renamable $sgpr0
+  ; RegAlloc-NEXT:   renamable $sgpr0 = S_CSELECT_B32 1, 0, implicit $scc
+  ; RegAlloc-NEXT:   renamable $sgpr0 = S_LSHL_B32 killed renamable $sgpr0, 1, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $sgpr0 = S_OR_B32 killed renamable $sgpr0, killed renamable $sgpr14, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr0
+  ; RegAlloc-NEXT:   S_BRANCH %bb.5
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT: bb.4:
+  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT:   renamable $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (invariant load (<2 x s32>), align 16, addrspace 4)
+  ; RegAlloc-NEXT:   renamable $vgpr1 = COPY killed renamable $sgpr5
+  ; RegAlloc-NEXT:   renamable $vgpr1_vgpr2, dead renamable $sgpr0_sgpr1 = V_MAD_U64_U32_e64 $sgpr9, killed $sgpr6, $vgpr1_vgpr2, 0, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr10_sgpr11, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
+  ; RegAlloc-NEXT:   renamable $sgpr5 = S_LSHL_B32 killed renamable $sgpr8, 6, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $vgpr1 = V_MUL_LO_U32_e64 killed $vgpr1, killed $sgpr5, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr4, 6, implicit-def dead $scc
+  ; RegAlloc-NEXT:   renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr0 = V_ADD_LSHL_U32_e64 killed $vgpr1, killed $vgpr0, 2, implicit $exec
+  ; RegAlloc-NEXT:   BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   S_ENDPGM 0
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT: bb.5:
+  ; RegAlloc-NEXT:   successors: %bb.4(0x80000000)
+  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13
+  ; RegAlloc-NEXT: {{  $}}
+  ; RegAlloc-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
+  ; RegAlloc-NEXT:   S_BRANCH %bb.4
+  ;
+  ; DeadInstDelete-LABEL: name: _amdgpu_cs_main
+  ; DeadInstDelete: bb.0:
+  ; DeadInstDelete-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; DeadInstDelete-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = COPY $sgpr3
+  ; DeadInstDelete-NEXT:   renamable $sgpr12_sgpr13 = COPY $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr8 = COPY $sgpr1
+  ; DeadInstDelete-NEXT:   renamable $sgpr9 = COPY $sgpr2
+  ; DeadInstDelete-NEXT:   renamable $sgpr2_sgpr3 = S_GETPC_B64_pseudo
+  ; DeadInstDelete-NEXT:   renamable $sgpr10_sgpr11 = S_MOV_B64_IMM_PSEUDO -4294967296
+  ; DeadInstDelete-NEXT:   renamable $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, renamable $sgpr10_sgpr11, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr1 = COPY killed renamable $sgpr10
+  ; DeadInstDelete-NEXT:   renamable $sgpr10_sgpr11 = S_OR_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr10_sgpr11, 16, 0 :: (invariant load (<4 x s32>), addrspace 4)
+  ; DeadInstDelete-NEXT:   renamable $sgpr7 = IMPLICIT_DEF
+  ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_LSHL_B32 renamable $sgpr7, 5, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $vgpr1 = COPY renamable $sgpr14
+  ; DeadInstDelete-NEXT:   renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr1, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $vgpr5 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr16 = V_MBCNT_HI_U32_B32_e64 -1, killed $vgpr5, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_OR_B32 killed renamable $sgpr14, 16, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $vgpr5 = V_LSHLREV_B32_e64 5, $vgpr16, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr6 = COPY killed renamable $sgpr14
+  ; DeadInstDelete-NEXT:   renamable $vgpr7 = V_OR_B32_e64 16, $vgpr5, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr6, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $sgpr14 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr8_vgpr9_vgpr10_vgpr11 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr5, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $sgpr15 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr7, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr1 = COPY renamable $vgpr7
+  ; DeadInstDelete-NEXT:   renamable $sgpr26 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr27 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr28 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
+  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr14_sgpr15, killed renamable $sgpr18_sgpr19, implicit-def $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr29 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr20_sgpr21, implicit-def $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr15 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr26_sgpr27, killed renamable $sgpr22_sgpr23, implicit-def $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr16 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr28_sgpr29, killed renamable $sgpr24_sgpr25, implicit-def $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr17 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr14, killed renamable $sgpr15, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr16, killed renamable $sgpr14, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr17, killed renamable $sgpr14, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr16_sgpr17 = V_CMP_NE_U32_e64 $sgpr7, killed $vgpr16, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr14, 1, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   $scc = COPY killed renamable $sgpr14
+  ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr12_sgpr13 = S_XOR_B64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $vgpr3 = IMPLICIT_DEF
+  ; DeadInstDelete-NEXT:   renamable $sgpr16_sgpr17 = COPY $exec, implicit-def $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr18_sgpr19 = S_AND_B64 renamable $sgpr16_sgpr17, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr12_sgpr13 = S_XOR_B64 renamable $sgpr18_sgpr19, killed renamable $sgpr16_sgpr17, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   $exec = S_MOV_B64_term killed renamable $sgpr18_sgpr19
+  ; DeadInstDelete-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; DeadInstDelete-NEXT:   S_BRANCH %bb.2
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT: bb.1:
+  ; DeadInstDelete-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
+  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr1_vgpr2:0x0000000000000003, $vgpr8_vgpr9_vgpr10_vgpr11:0x00000000000000FF, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000003F00
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT:   renamable $sgpr12_sgpr13 = S_OR_SAVEEXEC_B64 killed renamable $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; DeadInstDelete-NEXT:   $exec = S_XOR_B64_term $exec, renamable $sgpr12_sgpr13, implicit-def $scc
+  ; DeadInstDelete-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; DeadInstDelete-NEXT:   S_BRANCH %bb.3
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT: bb.2:
+  ; DeadInstDelete-NEXT:   successors: %bb.1(0x80000000)
+  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT:   renamable $sgpr15 = S_OR_B32 renamable $sgpr14, 2, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $vgpr8_vgpr9 = IMPLICIT_DEF
+  ; DeadInstDelete-NEXT:   renamable $vgpr1_vgpr2 = IMPLICIT_DEF
+  ; DeadInstDelete-NEXT:   renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
+  ; DeadInstDelete-NEXT:   S_BRANCH %bb.1
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT: bb.3:
+  ; DeadInstDelete-NEXT:   successors: %bb.5(0x80000000)
+  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr1_vgpr2:0x0000000000000003, $vgpr8_vgpr9_vgpr10_vgpr11:0x00000000000000FF, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000003F00
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT:   renamable $sgpr7 = S_LSHL_B32 killed renamable $sgpr7, 5, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $vgpr2 = COPY renamable $sgpr7
+  ; DeadInstDelete-NEXT:   renamable $sgpr7 = S_OR_B32 killed renamable $sgpr7, 16, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr7
+  ; DeadInstDelete-NEXT:   renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr2, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr7_vgpr8_vgpr9_vgpr10 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr1 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr2 = V_READFIRSTLANE_B32 killed $vgpr6, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr3 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
+  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr20_sgpr21, killed renamable $sgpr16_sgpr17, implicit-def $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr7 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr22_sgpr23, killed renamable $sgpr18_sgpr19, implicit-def $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr10, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr15 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr24_sgpr25, killed renamable $sgpr0_sgpr1, implicit-def $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr2_sgpr3, implicit-def $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr2 = S_AND_B32 killed renamable $sgpr7, killed renamable $sgpr15, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, killed renamable $sgpr2, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_AND_B32 killed renamable $sgpr1, killed renamable $sgpr0, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, 1, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   $scc = COPY killed renamable $sgpr0
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_CSELECT_B32 1, 0, implicit $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_LSHL_B32 killed renamable $sgpr0, 1, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_OR_B32 killed renamable $sgpr0, killed renamable $sgpr14, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr0
+  ; DeadInstDelete-NEXT:   S_BRANCH %bb.5
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT: bb.4:
+  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT:   renamable $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (invariant load (<2 x s32>), align 16, addrspace 4)
+  ; DeadInstDelete-NEXT:   renamable $vgpr1 = COPY killed renamable $sgpr5
+  ; DeadInstDelete-NEXT:   renamable $vgpr1_vgpr2, dead renamable $sgpr0_sgpr1 = V_MAD_U64_U32_e64 $sgpr9, killed $sgpr6, $vgpr1_vgpr2, 0, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr10_sgpr11, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
+  ; DeadInstDelete-NEXT:   renamable $sgpr5 = S_LSHL_B32 killed renamable $sgpr8, 6, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $vgpr1 = V_MUL_LO_U32_e64 killed $vgpr1, killed $sgpr5, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr4, 6, implicit-def dead $scc
+  ; DeadInstDelete-NEXT:   renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr0 = V_ADD_LSHL_U32_e64 killed $vgpr1, killed $vgpr0, 2, implicit $exec
+  ; DeadInstDelete-NEXT:   BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   S_ENDPGM 0
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT: bb.5:
+  ; DeadInstDelete-NEXT:   successors: %bb.4(0x80000000)
+  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13
+  ; DeadInstDelete-NEXT: {{  $}}
+  ; DeadInstDelete-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
+  ; DeadInstDelete-NEXT:   S_BRANCH %bb.4
+  bb.39:
+    successors: %bb.82(0x40000000), %bb.40(0x40000000)
+    liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2
+
+    %11:vgpr_32 = COPY $vgpr0
+    undef %1.sub0:sreg_64 = COPY $sgpr1
+    %1.sub1:sreg_64 = COPY $sgpr2
+    undef %18.sub0:sreg_64 = COPY $sgpr3
+    %8:sreg_32 = COPY $sgpr4
+    %9:sreg_32 = COPY $sgpr5
+    %10:sreg_32 = COPY $sgpr6
+    %15:sreg_64 = S_GETPC_B64_pseudo
+    undef %16.sub0:sreg_64 = S_MOV_B32 0
+    %16.sub1:sreg_64 = S_MOV_B32 -1
+    %17:sreg_64 = S_AND_B64 %15, %16, implicit-def dead $scc
+    %18.sub1:sreg_64 = COPY %16.sub0
+    %19:sreg_64 = S_OR_B64 %17, %18, implicit-def dead $scc
+    %23:sgpr_128 = S_LOAD_DWORDX4_IMM %19, 16, 0 :: (invariant load (<4 x s32>), addrspace 4)
+    %1865:sreg_32 = IMPLICIT_DEF
+    %218:vgpr_32 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
+    %221:vgpr_32 = V_MBCNT_HI_U32_B32_e64 -1, %218, implicit $exec
+    %223:vgpr_32 = V_LSHLREV_B32_e64 5, %221, implicit $exec
+    undef %573.sub0_sub1_sub2_sub3:vreg_256 = BUFFER_LOAD_DWORDX4_OFFEN %223, %23, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+    %226:vgpr_32 = V_OR_B32_e64 16, %223, implicit $exec
+    %573.sub4_sub5_sub6_sub7:vreg_256 = BUFFER_LOAD_DWORDX4_OFFEN %226, %23, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+    undef %245.sub0:sreg_64 = V_READFIRSTLANE_B32 %573.sub0, implicit $exec
+    %245.sub1:sreg_64 = V_READFIRSTLANE_B32 %573.sub1, implicit $exec
+    undef %253.sub0:sreg_64 = V_READFIRSTLANE_B32 %573.sub2, implicit $exec
+    %253.sub1:sreg_64 = V_READFIRSTLANE_B32 %573.sub3, implicit $exec
+    undef %261.sub0:sreg_64 = V_READFIRSTLANE_B32 %573.sub4, implicit $exec
+    %261.sub1:sreg_64 = V_READFIRSTLANE_B32 %573.sub5, implicit $exec
+    undef %269.sub0:sreg_64 = V_READFIRSTLANE_B32 %573.sub6, implicit $exec
+    undef %1853.sub0:vreg_64 = COPY %573.sub7
+    %269.sub1:sreg_64 = V_READFIRSTLANE_B32 %1853.sub0, implicit $exec
+    %271:sreg_32 = S_LSHL_B32 %1865, 5, implicit-def dead $scc
+    %1328:vgpr_32 = COPY %271
+    %1329:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %1328, %23, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+    undef %574.sub0:sgpr_256 = V_READFIRSTLANE_B32 %1329.sub0, implicit $exec
+    %574.sub1:sgpr_256 = V_READFIRSTLANE_B32 %1329.sub1, implicit $exec
+    %574.sub2:sgpr_256 = V_READFIRSTLANE_B32 %1329.sub2, implicit $exec
+    %574.sub3:sgpr_256 = V_READFIRSTLANE_B32 %1329.sub3, implicit $exec
+    %273:sreg_32 = S_OR_B32 %271, 16, implicit-def dead $scc
+    %1339:vgpr_32 = COPY %273
+    %1340:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %1339, %23, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+    %574.sub4:sgpr_256 = V_READFIRSTLANE_B32 %1340.sub0, implicit $exec
+    %574.sub5:sgpr_256 = V_READFIRSTLANE_B32 %1340.sub1, implicit $exec
+    %574.sub6:sgpr_256 = V_READFIRSTLANE_B32 %1340.sub2, implicit $exec
+    %574.sub7:sgpr_256 = V_READFIRSTLANE_B32 %1340.sub3, implicit $exec
+    S_CMP_EQ_U64 %574.sub0_sub1, %245, implicit-def $scc
+    %1349:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    S_CMP_EQ_U64 %574.sub2_sub3, %253, implicit-def $scc
+    %1350:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    S_CMP_EQ_U64 %574.sub4_sub5, %261, implicit-def $scc
+    %1351:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    S_CMP_EQ_U64 %574.sub6_sub7, %269, implicit-def $scc
+    %1352:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    %1355:sreg_32 = S_AND_B32 %1349, %1350, implicit-def dead $scc
+    %1358:sreg_32 = S_AND_B32 %1351, %1355, implicit-def dead $scc
+    %1361:sreg_32 = S_AND_B32 %1352, %1358, implicit-def dead $scc
+    %1366:sreg_32 = S_AND_B32 %1361, 1, implicit-def dead $scc
+    $scc = COPY %1366
+    %288:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    %289:sreg_64_xexec = V_CMP_NE_U32_e64 %1865, %221, implicit $exec
+    %1369:sreg_64_xexec = COPY $exec
+    %291:sreg_64_xexec = S_XOR_B64 %289, %1369, implicit-def dead $scc
+    %1968:vgpr_32 = IMPLICIT_DEF
+    %2081:sreg_64 = COPY $exec, implicit-def $exec
+    %2082:sreg_64 = S_AND_B64 %2081, %291, implicit-def dead $scc
+    %293:sreg_64_xexec = S_XOR_B64 %2082, %2081, implicit-def dead $scc
+    $exec = S_MOV_B64_term %2082
+    S_CBRANCH_EXECZ %bb.40, implicit $exec
+    S_BRANCH %bb.82
+
+  bb.40:
+    successors: %bb.81(0x40000000), %bb.42(0x40000000)
+
+    %2083:sreg_64 = S_OR_SAVEEXEC_B64 %293, implicit-def $exec, implicit-def $scc, implicit $exec
+    %297:sreg_64_xexec = S_AND_B64 $exec, %2083, implicit-def $scc
+    $exec = S_XOR_B64_term $exec, %297, implicit-def $scc
+    S_CBRANCH_EXECZ %bb.42, implicit $exec
+    S_BRANCH %bb.81
+
+  bb.82:
+    successors: %bb.40(0x80000000)
+
+    %294:sreg_32 = S_OR_B32 %288, 2, implicit-def dead $scc
+    %1968:vgpr_32 = COPY %294
+    undef %573.sub0_sub1:vreg_256 = IMPLICIT_DEF
+    %1853:vreg_64 = IMPLICIT_DEF
+    S_BRANCH %bb.40
+
+  bb.81:
+    successors: %bb.42(0x80000000)
+
+    undef %484.sub0:sreg_64 = V_READFIRSTLANE_B32 %573.sub0, implicit $exec
+    %484.sub1:sreg_64 = V_READFIRSTLANE_B32 %573.sub1, implicit $exec
+    undef %489.sub0:sreg_64 = V_READFIRSTLANE_B32 %573.sub2, implicit $exec
+    %489.sub1:sreg_64 = V_READFIRSTLANE_B32 %573.sub3, implicit $exec
+    undef %494.sub0:sreg_64 = V_READFIRSTLANE_B32 %573.sub4, implicit $exec
+    %494.sub1:sreg_64 = V_READFIRSTLANE_B32 %573.sub5, implicit $exec
+    undef %499.sub0:sreg_64 = V_READFIRSTLANE_B32 %573.sub6, implicit $exec
+    %499.sub1:sreg_64 = V_READFIRSTLANE_B32 %1853.sub0, implicit $exec
+    %501:sreg_32 = S_LSHL_B32 %1865, 5, implicit-def dead $scc
+    %1761:vgpr_32 = COPY %501
+    %1762:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %1761, %23, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+    undef %607.sub0:sgpr_256 = V_READFIRSTLANE_B32 %1762.sub0, implicit $exec
+    %607.sub1:sgpr_256 = V_READFIRSTLANE_B32 %1762.sub1, implicit $exec
+    %607.sub2:sgpr_256 = V_READFIRSTLANE_B32 %1762.sub2, implicit $exec
+    %607.sub3:sgpr_256 = V_READFIRSTLANE_B32 %1762.sub3, implicit $exec
+    %503:sreg_32 = S_OR_B32 %501, 16, implicit-def dead $scc
+    %1772:vgpr_32 = COPY %503
+    %1773:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %1772, %23, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+    %607.sub4:sgpr_256 = V_READFIRSTLANE_B32 %1773.sub0, implicit $exec
+    %607.sub5:sgpr_256 = V_READFIRSTLANE_B32 %1773.sub1, implicit $exec
+    %607.sub6:sgpr_256 = V_READFIRSTLANE_B32 %1773.sub2, implicit $exec
+    %607.sub7:sgpr_256 = V_READFIRSTLANE_B32 %1773.sub3, implicit $exec
+    S_CMP_EQ_U64 %607.sub0_sub1, %484, implicit-def $scc
+    %1782:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    S_CMP_EQ_U64 %607.sub2_sub3, %489, implicit-def $scc
+    %1783:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    S_CMP_EQ_U64 %607.sub4_sub5, %494, implicit-def $scc
+    %1784:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    S_CMP_EQ_U64 %607.sub6_sub7, %499, implicit-def $scc
+    %1785:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    %1788:sreg_32 = S_AND_B32 %1782, %1783, implicit-def dead $scc
+    %1791:sreg_32 = S_AND_B32 %1784, %1788, implicit-def dead $scc
+    %1794:sreg_32 = S_AND_B32 %1785, %1791, implicit-def dead $scc
+    %1799:sreg_32 = S_AND_B32 %1794, 1, implicit-def dead $scc
+    $scc = COPY %1799
+    %608:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+    %518:sreg_32 = S_LSHL_B32 %608, 1, implicit-def dead $scc
+    %519:sreg_32 = S_OR_B32 %518, %288, implicit-def dead $scc
+    %1968:vgpr_32 = COPY %519
+    S_BRANCH %bb.42
+
+
+  bb.83:
+    %521:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (invariant load (<2 x s32>), align 16, addrspace 4)
+    %530:sreg_32 = S_LSHL_B32 %8, 6, implicit-def dead $scc
+    %531:vgpr_32 = V_ADD_U32_e64 %530, %11, 0, implicit $exec
+    %532:sreg_32 = S_LSHL_B32 %521.sub0, 6, implicit-def dead $scc
+    undef %1829.sub0:vreg_64 = COPY %9
+    %1832:vreg_64, dead %1831:sreg_64 = V_MAD_U64_U32_e64 %521.sub1, %10, %1829, 0, implicit $exec
+    %1809:vgpr_32 = V_MUL_LO_U32_e64 %1832.sub0, %532, implicit $exec
+    %537:sgpr_128 = S_LOAD_DWORDX4_IMM %19, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
+    %538:vgpr_32 = V_ADD_LSHL_U32_e64 %1809, %531, 2, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN_exact %1968, %538, %537, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+    S_ENDPGM 0
+
+  bb.42:
+    successors: %bb.83(0x80000000)
+
+    $exec = S_OR_B64 $exec, %297, implicit-def $scc
+    S_BRANCH %bb.83
+
+...

>From de4b61add6ffff537c496bfae446f06b7aee87c1 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Wed, 17 Apr 2024 13:07:27 +0200
Subject: [PATCH 2/2] RenameIndependentSubregs: Add missing sub-range for new
 IMPLICIT_DEFs

Existing sub-ranges are correctly updated because new IMPLICIT_DEF is
added, but there is missing sub-range for IMPLICIT_DEF itself.
Because of missing sub-range in live-intervals for IMPLICIT_DEF,
register allocator does not know that IMPLICIT_DEF rewrites its
virtual sub-registers and can end up assigning overlapping physical
registers to them.
This results in deleting instructions that were defined by sub-registers
overwritten by IMPLICIT_DEF as they are now dead.
---
 llvm/lib/CodeGen/RenameIndependentSubregs.cpp |   7 +
 .../GlobalISel/llvm.amdgcn.intersect_ray.ll   | 291 ++++++++++--------
 ...rval-bug-in-rename-independent-subregs.mir | 199 ++++++------
 3 files changed, 263 insertions(+), 234 deletions(-)

diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index bc3ef1c0329a98..e888f290df510d 100644
--- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -334,10 +334,17 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
                                                DebugLoc(), MCDesc, Reg);
           SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef);
           SlotIndex RegDefIdx = DefIdx.getRegSlot();
+          LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(Reg);
           for (LiveInterval::SubRange &SR : LI.subranges()) {
+            Mask = Mask & ~SR.LaneMask;
             VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator);
             SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI));
           }
+
+          if (!Mask.none()) {
+            LiveInterval::SubRange *SR = LI.createSubRange(Allocator, Mask);
+            SR->createDeadDef(RegDefIdx, Allocator);
+          }
         }
       }
     }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
index 47e476de74cf41..6e96a4ddbc0b3b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
@@ -144,40 +144,42 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float
 define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) {
 ; GFX1030-LABEL: image_bvh_intersect_ray_vgpr_descr:
 ; GFX1030:       ; %bb.0:
-; GFX1030-NEXT:    v_mov_b32_e32 v15, v0
-; GFX1030-NEXT:    v_mov_b32_e32 v16, v1
-; GFX1030-NEXT:    v_mov_b32_e32 v17, v2
-; GFX1030-NEXT:    v_mov_b32_e32 v18, v3
-; GFX1030-NEXT:    v_mov_b32_e32 v19, v4
-; GFX1030-NEXT:    v_mov_b32_e32 v20, v5
-; GFX1030-NEXT:    v_mov_b32_e32 v21, v6
-; GFX1030-NEXT:    v_mov_b32_e32 v22, v7
-; GFX1030-NEXT:    v_mov_b32_e32 v23, v8
-; GFX1030-NEXT:    v_mov_b32_e32 v24, v9
-; GFX1030-NEXT:    v_mov_b32_e32 v25, v10
+; GFX1030-NEXT:    v_mov_b32_e32 v21, v0
+; GFX1030-NEXT:    v_mov_b32_e32 v22, v1
+; GFX1030-NEXT:    v_mov_b32_e32 v23, v2
+; GFX1030-NEXT:    v_mov_b32_e32 v24, v3
+; GFX1030-NEXT:    v_mov_b32_e32 v25, v4
+; GFX1030-NEXT:    v_mov_b32_e32 v26, v5
+; GFX1030-NEXT:    v_mov_b32_e32 v27, v6
+; GFX1030-NEXT:    v_mov_b32_e32 v28, v7
+; GFX1030-NEXT:    v_mov_b32_e32 v29, v8
+; GFX1030-NEXT:    v_mov_b32_e32 v30, v9
+; GFX1030-NEXT:    v_mov_b32_e32 v31, v10
+; GFX1030-NEXT:    v_mov_b32_e32 v19, v11
+; GFX1030-NEXT:    v_mov_b32_e32 v20, v12
 ; GFX1030-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1030-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GFX1030-NEXT:    v_readfirstlane_b32 s4, v11
-; GFX1030-NEXT:    v_readfirstlane_b32 s5, v12
+; GFX1030-NEXT:    v_readfirstlane_b32 s4, v19
+; GFX1030-NEXT:    v_readfirstlane_b32 s5, v20
 ; GFX1030-NEXT:    v_readfirstlane_b32 s6, v13
 ; GFX1030-NEXT:    v_readfirstlane_b32 s7, v14
-; GFX1030-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
+; GFX1030-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20]
 ; GFX1030-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
 ; GFX1030-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX1030-NEXT:    s_and_saveexec_b32 s0, s0
-; GFX1030-NEXT:    image_bvh_intersect_ray v[0:3], v[15:25], s[4:7]
-; GFX1030-NEXT:    ; implicit-def: $vgpr11
-; GFX1030-NEXT:    ; implicit-def: $vgpr15
-; GFX1030-NEXT:    ; implicit-def: $vgpr16
-; GFX1030-NEXT:    ; implicit-def: $vgpr17
-; GFX1030-NEXT:    ; implicit-def: $vgpr18
+; GFX1030-NEXT:    image_bvh_intersect_ray v[0:3], v[21:31], s[4:7]
 ; GFX1030-NEXT:    ; implicit-def: $vgpr19
-; GFX1030-NEXT:    ; implicit-def: $vgpr20
 ; GFX1030-NEXT:    ; implicit-def: $vgpr21
 ; GFX1030-NEXT:    ; implicit-def: $vgpr22
 ; GFX1030-NEXT:    ; implicit-def: $vgpr23
 ; GFX1030-NEXT:    ; implicit-def: $vgpr24
 ; GFX1030-NEXT:    ; implicit-def: $vgpr25
+; GFX1030-NEXT:    ; implicit-def: $vgpr26
+; GFX1030-NEXT:    ; implicit-def: $vgpr27
+; GFX1030-NEXT:    ; implicit-def: $vgpr28
+; GFX1030-NEXT:    ; implicit-def: $vgpr29
+; GFX1030-NEXT:    ; implicit-def: $vgpr30
+; GFX1030-NEXT:    ; implicit-def: $vgpr31
 ; GFX1030-NEXT:    ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
 ; GFX1030-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX1030-NEXT:    s_cbranch_execnz .LBB6_1
@@ -188,18 +190,20 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
 ;
 ; GFX1013-LABEL: image_bvh_intersect_ray_vgpr_descr:
 ; GFX1013:       ; %bb.0:
+; GFX1013-NEXT:    v_mov_b32_e32 v19, v11
+; GFX1013-NEXT:    v_mov_b32_e32 v20, v12
 ; GFX1013-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1013-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT:    v_readfirstlane_b32 s4, v11
-; GFX1013-NEXT:    v_readfirstlane_b32 s5, v12
+; GFX1013-NEXT:    v_readfirstlane_b32 s4, v19
+; GFX1013-NEXT:    v_readfirstlane_b32 s5, v20
 ; GFX1013-NEXT:    v_readfirstlane_b32 s6, v13
 ; GFX1013-NEXT:    v_readfirstlane_b32 s7, v14
-; GFX1013-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
+; GFX1013-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20]
 ; GFX1013-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
 ; GFX1013-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX1013-NEXT:    s_and_saveexec_b32 s0, s0
 ; GFX1013-NEXT:    image_bvh_intersect_ray v[15:18], v[0:10], s[4:7]
-; GFX1013-NEXT:    ; implicit-def: $vgpr11
+; GFX1013-NEXT:    ; implicit-def: $vgpr19
 ; GFX1013-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10
 ; GFX1013-NEXT:    ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
 ; GFX1013-NEXT:    s_waitcnt_depctr 0xffe3
@@ -216,25 +220,27 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
 ;
 ; GFX11-LABEL: image_bvh_intersect_ray_vgpr_descr:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_dual_mov_b32 v18, v0 :: v_dual_mov_b32 v19, v1
+; GFX11-NEXT:    v_dual_mov_b32 v20, v0 :: v_dual_mov_b32 v21, v1
 ; GFX11-NEXT:    v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v16, v3
-; GFX11-NEXT:    v_mov_b32_e32 v17, v4
+; GFX11-NEXT:    v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v11
+; GFX11-NEXT:    v_mov_b32_e32 v19, v12
 ; GFX11-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX11-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT:    v_readfirstlane_b32 s4, v11
-; GFX11-NEXT:    v_readfirstlane_b32 s5, v12
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_readfirstlane_b32 s4, v18
+; GFX11-NEXT:    v_readfirstlane_b32 s5, v19
 ; GFX11-NEXT:    v_readfirstlane_b32 s6, v13
 ; GFX11-NEXT:    v_readfirstlane_b32 s7, v14
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
+; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
 ; GFX11-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX11-NEXT:    s_and_saveexec_b32 s0, s0
-; GFX11-NEXT:    image_bvh_intersect_ray v[0:3], [v18, v19, v[15:17], v[5:7], v[8:10]], s[4:7]
-; GFX11-NEXT:    ; implicit-def: $vgpr11
+; GFX11-NEXT:    image_bvh_intersect_ray v[0:3], [v20, v21, v[15:17], v[5:7], v[8:10]], s[4:7]
 ; GFX11-NEXT:    ; implicit-def: $vgpr18
-; GFX11-NEXT:    ; implicit-def: $vgpr19
+; GFX11-NEXT:    ; implicit-def: $vgpr20
+; GFX11-NEXT:    ; implicit-def: $vgpr21
 ; GFX11-NEXT:    ; implicit-def: $vgpr15_vgpr16_vgpr17
 ; GFX11-NEXT:    ; implicit-def: $vgpr5_vgpr6_vgpr7
 ; GFX11-NEXT:    ; implicit-def: $vgpr8_vgpr9_vgpr10
@@ -253,39 +259,41 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
 define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) {
 ; GFX1030-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
 ; GFX1030:       ; %bb.0:
-; GFX1030-NEXT:    v_mov_b32_e32 v13, v0
-; GFX1030-NEXT:    v_mov_b32_e32 v14, v1
+; GFX1030-NEXT:    v_mov_b32_e32 v18, v0
+; GFX1030-NEXT:    v_mov_b32_e32 v19, v1
 ; GFX1030-NEXT:    v_lshrrev_b32_e32 v0, 16, v5
 ; GFX1030-NEXT:    v_and_b32_e32 v1, 0xffff, v7
-; GFX1030-NEXT:    v_mov_b32_e32 v15, v2
+; GFX1030-NEXT:    v_mov_b32_e32 v20, v2
 ; GFX1030-NEXT:    v_and_b32_e32 v2, 0xffff, v8
-; GFX1030-NEXT:    v_mov_b32_e32 v16, v3
+; GFX1030-NEXT:    v_mov_b32_e32 v21, v3
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT:    v_mov_b32_e32 v17, v4
-; GFX1030-NEXT:    v_alignbit_b32 v20, v2, v7, 16
+; GFX1030-NEXT:    v_mov_b32_e32 v22, v4
+; GFX1030-NEXT:    v_mov_b32_e32 v16, v9
+; GFX1030-NEXT:    v_mov_b32_e32 v17, v10
+; GFX1030-NEXT:    v_and_or_b32 v23, 0xffff, v5, v0
+; GFX1030-NEXT:    v_and_or_b32 v24, 0xffff, v6, v1
+; GFX1030-NEXT:    v_alignbit_b32 v25, v2, v7, 16
 ; GFX1030-NEXT:    s_mov_b32 s1, exec_lo
-; GFX1030-NEXT:    v_and_or_b32 v18, 0xffff, v5, v0
-; GFX1030-NEXT:    v_and_or_b32 v19, 0xffff, v6, v1
 ; GFX1030-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GFX1030-NEXT:    v_readfirstlane_b32 s4, v9
-; GFX1030-NEXT:    v_readfirstlane_b32 s5, v10
+; GFX1030-NEXT:    v_readfirstlane_b32 s4, v16
+; GFX1030-NEXT:    v_readfirstlane_b32 s5, v17
 ; GFX1030-NEXT:    v_readfirstlane_b32 s6, v11
 ; GFX1030-NEXT:    v_readfirstlane_b32 s7, v12
-; GFX1030-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10]
+; GFX1030-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]
 ; GFX1030-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[11:12]
 ; GFX1030-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX1030-NEXT:    s_and_saveexec_b32 s0, s0
-; GFX1030-NEXT:    image_bvh_intersect_ray v[0:3], v[13:20], s[4:7] a16
-; GFX1030-NEXT:    ; implicit-def: $vgpr9
-; GFX1030-NEXT:    ; implicit-def: $vgpr13
-; GFX1030-NEXT:    ; implicit-def: $vgpr14
-; GFX1030-NEXT:    ; implicit-def: $vgpr15
+; GFX1030-NEXT:    image_bvh_intersect_ray v[0:3], v[18:25], s[4:7] a16
 ; GFX1030-NEXT:    ; implicit-def: $vgpr16
-; GFX1030-NEXT:    ; implicit-def: $vgpr17
 ; GFX1030-NEXT:    ; implicit-def: $vgpr18
 ; GFX1030-NEXT:    ; implicit-def: $vgpr19
 ; GFX1030-NEXT:    ; implicit-def: $vgpr20
+; GFX1030-NEXT:    ; implicit-def: $vgpr21
+; GFX1030-NEXT:    ; implicit-def: $vgpr22
+; GFX1030-NEXT:    ; implicit-def: $vgpr23
+; GFX1030-NEXT:    ; implicit-def: $vgpr24
+; GFX1030-NEXT:    ; implicit-def: $vgpr25
 ; GFX1030-NEXT:    ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12
 ; GFX1030-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX1030-NEXT:    s_cbranch_execnz .LBB7_1
@@ -296,26 +304,28 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
 ;
 ; GFX1013-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
 ; GFX1013:       ; %bb.0:
-; GFX1013-NEXT:    v_lshrrev_b32_e32 v13, 16, v5
-; GFX1013-NEXT:    v_and_b32_e32 v14, 0xffff, v7
+; GFX1013-NEXT:    v_mov_b32_e32 v17, v9
+; GFX1013-NEXT:    v_mov_b32_e32 v18, v10
+; GFX1013-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
+; GFX1013-NEXT:    v_and_b32_e32 v10, 0xffff, v7
 ; GFX1013-NEXT:    v_and_b32_e32 v8, 0xffff, v8
 ; GFX1013-NEXT:    s_mov_b32 s1, exec_lo
-; GFX1013-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
-; GFX1013-NEXT:    v_lshlrev_b32_e32 v14, 16, v14
+; GFX1013-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
+; GFX1013-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
 ; GFX1013-NEXT:    v_alignbit_b32 v7, v8, v7, 16
-; GFX1013-NEXT:    v_and_or_b32 v5, 0xffff, v5, v13
-; GFX1013-NEXT:    v_and_or_b32 v6, 0xffff, v6, v14
+; GFX1013-NEXT:    v_and_or_b32 v5, 0xffff, v5, v9
+; GFX1013-NEXT:    v_and_or_b32 v6, 0xffff, v6, v10
 ; GFX1013-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT:    v_readfirstlane_b32 s4, v9
-; GFX1013-NEXT:    v_readfirstlane_b32 s5, v10
+; GFX1013-NEXT:    v_readfirstlane_b32 s4, v17
+; GFX1013-NEXT:    v_readfirstlane_b32 s5, v18
 ; GFX1013-NEXT:    v_readfirstlane_b32 s6, v11
 ; GFX1013-NEXT:    v_readfirstlane_b32 s7, v12
-; GFX1013-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10]
+; GFX1013-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18]
 ; GFX1013-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[11:12]
 ; GFX1013-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX1013-NEXT:    s_and_saveexec_b32 s0, s0
 ; GFX1013-NEXT:    image_bvh_intersect_ray v[13:16], v[0:7], s[4:7] a16
-; GFX1013-NEXT:    ; implicit-def: $vgpr9
+; GFX1013-NEXT:    ; implicit-def: $vgpr17
 ; GFX1013-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
 ; GFX1013-NEXT:    ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12
 ; GFX1013-NEXT:    s_waitcnt_depctr 0xffe3
@@ -333,27 +343,28 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
 ; GFX11-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1
-; GFX11-NEXT:    v_dual_mov_b32 v15, v4 :: v_dual_and_b32 v0, 0xffff, v7
+; GFX11-NEXT:    v_dual_mov_b32 v19, v10 :: v_dual_and_b32 v0, 0xffff, v7
 ; GFX11-NEXT:    v_and_b32_e32 v1, 0xffff, v8
 ; GFX11-NEXT:    v_dual_mov_b32 v13, v2 :: v_dual_mov_b32 v14, v3
-; GFX11-NEXT:    s_mov_b32 s1, exec_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT:    v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v18, v9
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-NEXT:    v_lshl_or_b32 v4, v5, 16, v0
 ; GFX11-NEXT:    v_perm_b32 v5, v5, v7, 0x7060302
 ; GFX11-NEXT:    v_lshl_or_b32 v6, v6, 16, v1
+; GFX11-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX11-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT:    v_readfirstlane_b32 s4, v9
-; GFX11-NEXT:    v_readfirstlane_b32 s5, v10
+; GFX11-NEXT:    v_readfirstlane_b32 s4, v18
+; GFX11-NEXT:    v_readfirstlane_b32 s5, v19
 ; GFX11-NEXT:    v_readfirstlane_b32 s6, v11
 ; GFX11-NEXT:    v_readfirstlane_b32 s7, v12
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10]
+; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
 ; GFX11-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[11:12]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX11-NEXT:    s_and_saveexec_b32 s0, s0
 ; GFX11-NEXT:    image_bvh_intersect_ray v[0:3], [v16, v17, v[13:15], v[4:6]], s[4:7] a16
-; GFX11-NEXT:    ; implicit-def: $vgpr9
+; GFX11-NEXT:    ; implicit-def: $vgpr18
 ; GFX11-NEXT:    ; implicit-def: $vgpr16
 ; GFX11-NEXT:    ; implicit-def: $vgpr17
 ; GFX11-NEXT:    ; implicit-def: $vgpr13_vgpr14_vgpr15
@@ -373,42 +384,44 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
 define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) {
 ; GFX1030-LABEL: image_bvh64_intersect_ray_vgpr_descr:
 ; GFX1030:       ; %bb.0:
-; GFX1030-NEXT:    v_mov_b32_e32 v16, v0
-; GFX1030-NEXT:    v_mov_b32_e32 v17, v1
-; GFX1030-NEXT:    v_mov_b32_e32 v18, v2
-; GFX1030-NEXT:    v_mov_b32_e32 v19, v3
-; GFX1030-NEXT:    v_mov_b32_e32 v20, v4
-; GFX1030-NEXT:    v_mov_b32_e32 v21, v5
-; GFX1030-NEXT:    v_mov_b32_e32 v22, v6
-; GFX1030-NEXT:    v_mov_b32_e32 v23, v7
-; GFX1030-NEXT:    v_mov_b32_e32 v24, v8
-; GFX1030-NEXT:    v_mov_b32_e32 v25, v9
-; GFX1030-NEXT:    v_mov_b32_e32 v26, v10
-; GFX1030-NEXT:    v_mov_b32_e32 v27, v11
+; GFX1030-NEXT:    v_mov_b32_e32 v22, v0
+; GFX1030-NEXT:    v_mov_b32_e32 v23, v1
+; GFX1030-NEXT:    v_mov_b32_e32 v24, v2
+; GFX1030-NEXT:    v_mov_b32_e32 v25, v3
+; GFX1030-NEXT:    v_mov_b32_e32 v26, v4
+; GFX1030-NEXT:    v_mov_b32_e32 v27, v5
+; GFX1030-NEXT:    v_mov_b32_e32 v28, v6
+; GFX1030-NEXT:    v_mov_b32_e32 v29, v7
+; GFX1030-NEXT:    v_mov_b32_e32 v30, v8
+; GFX1030-NEXT:    v_mov_b32_e32 v31, v9
+; GFX1030-NEXT:    v_mov_b32_e32 v32, v10
+; GFX1030-NEXT:    v_mov_b32_e32 v33, v11
+; GFX1030-NEXT:    v_mov_b32_e32 v20, v12
+; GFX1030-NEXT:    v_mov_b32_e32 v21, v13
 ; GFX1030-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1030-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GFX1030-NEXT:    v_readfirstlane_b32 s4, v12
-; GFX1030-NEXT:    v_readfirstlane_b32 s5, v13
+; GFX1030-NEXT:    v_readfirstlane_b32 s4, v20
+; GFX1030-NEXT:    v_readfirstlane_b32 s5, v21
 ; GFX1030-NEXT:    v_readfirstlane_b32 s6, v14
 ; GFX1030-NEXT:    v_readfirstlane_b32 s7, v15
-; GFX1030-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
+; GFX1030-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[20:21]
 ; GFX1030-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
 ; GFX1030-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX1030-NEXT:    s_and_saveexec_b32 s0, s0
-; GFX1030-NEXT:    image_bvh64_intersect_ray v[0:3], v[16:27], s[4:7]
-; GFX1030-NEXT:    ; implicit-def: $vgpr12
-; GFX1030-NEXT:    ; implicit-def: $vgpr16
-; GFX1030-NEXT:    ; implicit-def: $vgpr17
-; GFX1030-NEXT:    ; implicit-def: $vgpr18
-; GFX1030-NEXT:    ; implicit-def: $vgpr19
+; GFX1030-NEXT:    image_bvh64_intersect_ray v[0:3], v[22:33], s[4:7]
 ; GFX1030-NEXT:    ; implicit-def: $vgpr20
-; GFX1030-NEXT:    ; implicit-def: $vgpr21
 ; GFX1030-NEXT:    ; implicit-def: $vgpr22
 ; GFX1030-NEXT:    ; implicit-def: $vgpr23
 ; GFX1030-NEXT:    ; implicit-def: $vgpr24
 ; GFX1030-NEXT:    ; implicit-def: $vgpr25
 ; GFX1030-NEXT:    ; implicit-def: $vgpr26
 ; GFX1030-NEXT:    ; implicit-def: $vgpr27
+; GFX1030-NEXT:    ; implicit-def: $vgpr28
+; GFX1030-NEXT:    ; implicit-def: $vgpr29
+; GFX1030-NEXT:    ; implicit-def: $vgpr30
+; GFX1030-NEXT:    ; implicit-def: $vgpr31
+; GFX1030-NEXT:    ; implicit-def: $vgpr32
+; GFX1030-NEXT:    ; implicit-def: $vgpr33
 ; GFX1030-NEXT:    ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15
 ; GFX1030-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX1030-NEXT:    s_cbranch_execnz .LBB8_1
@@ -419,18 +432,20 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
 ;
 ; GFX1013-LABEL: image_bvh64_intersect_ray_vgpr_descr:
 ; GFX1013:       ; %bb.0:
+; GFX1013-NEXT:    v_mov_b32_e32 v20, v12
+; GFX1013-NEXT:    v_mov_b32_e32 v21, v13
 ; GFX1013-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1013-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT:    v_readfirstlane_b32 s4, v12
-; GFX1013-NEXT:    v_readfirstlane_b32 s5, v13
+; GFX1013-NEXT:    v_readfirstlane_b32 s4, v20
+; GFX1013-NEXT:    v_readfirstlane_b32 s5, v21
 ; GFX1013-NEXT:    v_readfirstlane_b32 s6, v14
 ; GFX1013-NEXT:    v_readfirstlane_b32 s7, v15
-; GFX1013-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
+; GFX1013-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[20:21]
 ; GFX1013-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
 ; GFX1013-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX1013-NEXT:    s_and_saveexec_b32 s0, s0
 ; GFX1013-NEXT:    image_bvh64_intersect_ray v[16:19], v[0:11], s[4:7]
-; GFX1013-NEXT:    ; implicit-def: $vgpr12
+; GFX1013-NEXT:    ; implicit-def: $vgpr20
 ; GFX1013-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
 ; GFX1013-NEXT:    ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15
 ; GFX1013-NEXT:    s_waitcnt_depctr 0xffe3
@@ -450,20 +465,22 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
 ; GFX11-NEXT:    v_dual_mov_b32 v19, v0 :: v_dual_mov_b32 v20, v1
 ; GFX11-NEXT:    v_dual_mov_b32 v21, v2 :: v_dual_mov_b32 v16, v3
 ; GFX11-NEXT:    v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v5
+; GFX11-NEXT:    v_dual_mov_b32 v4, v12 :: v_dual_mov_b32 v5, v13
 ; GFX11-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX11-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT:    v_readfirstlane_b32 s4, v12
-; GFX11-NEXT:    v_readfirstlane_b32 s5, v13
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_readfirstlane_b32 s4, v4
+; GFX11-NEXT:    v_readfirstlane_b32 s5, v5
 ; GFX11-NEXT:    v_readfirstlane_b32 s6, v14
 ; GFX11-NEXT:    v_readfirstlane_b32 s7, v15
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
+; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
 ; GFX11-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX11-NEXT:    s_and_saveexec_b32 s0, s0
 ; GFX11-NEXT:    image_bvh64_intersect_ray v[0:3], [v[19:20], v21, v[16:18], v[6:8], v[9:11]], s[4:7]
-; GFX11-NEXT:    ; implicit-def: $vgpr12
+; GFX11-NEXT:    ; implicit-def: $vgpr4
 ; GFX11-NEXT:    ; implicit-def: $vgpr19_vgpr20
 ; GFX11-NEXT:    ; implicit-def: $vgpr21
 ; GFX11-NEXT:    ; implicit-def: $vgpr16_vgpr17_vgpr18
@@ -484,41 +501,43 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
 define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) {
 ; GFX1030-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
 ; GFX1030:       ; %bb.0:
-; GFX1030-NEXT:    v_mov_b32_e32 v14, v0
-; GFX1030-NEXT:    v_mov_b32_e32 v15, v1
+; GFX1030-NEXT:    v_mov_b32_e32 v19, v0
+; GFX1030-NEXT:    v_mov_b32_e32 v20, v1
 ; GFX1030-NEXT:    v_lshrrev_b32_e32 v0, 16, v6
 ; GFX1030-NEXT:    v_and_b32_e32 v1, 0xffff, v8
-; GFX1030-NEXT:    v_mov_b32_e32 v16, v2
+; GFX1030-NEXT:    v_mov_b32_e32 v21, v2
 ; GFX1030-NEXT:    v_and_b32_e32 v2, 0xffff, v9
-; GFX1030-NEXT:    v_mov_b32_e32 v17, v3
+; GFX1030-NEXT:    v_mov_b32_e32 v22, v3
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT:    v_mov_b32_e32 v18, v4
-; GFX1030-NEXT:    v_mov_b32_e32 v19, v5
-; GFX1030-NEXT:    v_alignbit_b32 v22, v2, v8, 16
-; GFX1030-NEXT:    v_and_or_b32 v20, 0xffff, v6, v0
-; GFX1030-NEXT:    v_and_or_b32 v21, 0xffff, v7, v1
+; GFX1030-NEXT:    v_mov_b32_e32 v23, v4
+; GFX1030-NEXT:    v_mov_b32_e32 v24, v5
+; GFX1030-NEXT:    v_mov_b32_e32 v17, v10
+; GFX1030-NEXT:    v_mov_b32_e32 v18, v11
+; GFX1030-NEXT:    v_and_or_b32 v25, 0xffff, v6, v0
+; GFX1030-NEXT:    v_and_or_b32 v26, 0xffff, v7, v1
+; GFX1030-NEXT:    v_alignbit_b32 v27, v2, v8, 16
 ; GFX1030-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1030-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GFX1030-NEXT:    v_readfirstlane_b32 s4, v10
-; GFX1030-NEXT:    v_readfirstlane_b32 s5, v11
+; GFX1030-NEXT:    v_readfirstlane_b32 s4, v17
+; GFX1030-NEXT:    v_readfirstlane_b32 s5, v18
 ; GFX1030-NEXT:    v_readfirstlane_b32 s6, v12
 ; GFX1030-NEXT:    v_readfirstlane_b32 s7, v13
-; GFX1030-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
+; GFX1030-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18]
 ; GFX1030-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
 ; GFX1030-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX1030-NEXT:    s_and_saveexec_b32 s0, s0
-; GFX1030-NEXT:    image_bvh64_intersect_ray v[0:3], v[14:22], s[4:7] a16
-; GFX1030-NEXT:    ; implicit-def: $vgpr10
-; GFX1030-NEXT:    ; implicit-def: $vgpr14
-; GFX1030-NEXT:    ; implicit-def: $vgpr15
-; GFX1030-NEXT:    ; implicit-def: $vgpr16
+; GFX1030-NEXT:    image_bvh64_intersect_ray v[0:3], v[19:27], s[4:7] a16
 ; GFX1030-NEXT:    ; implicit-def: $vgpr17
-; GFX1030-NEXT:    ; implicit-def: $vgpr18
 ; GFX1030-NEXT:    ; implicit-def: $vgpr19
 ; GFX1030-NEXT:    ; implicit-def: $vgpr20
 ; GFX1030-NEXT:    ; implicit-def: $vgpr21
 ; GFX1030-NEXT:    ; implicit-def: $vgpr22
+; GFX1030-NEXT:    ; implicit-def: $vgpr23
+; GFX1030-NEXT:    ; implicit-def: $vgpr24
+; GFX1030-NEXT:    ; implicit-def: $vgpr25
+; GFX1030-NEXT:    ; implicit-def: $vgpr26
+; GFX1030-NEXT:    ; implicit-def: $vgpr27
 ; GFX1030-NEXT:    ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
 ; GFX1030-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX1030-NEXT:    s_cbranch_execnz .LBB9_1
@@ -529,26 +548,28 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
 ;
 ; GFX1013-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
 ; GFX1013:       ; %bb.0:
-; GFX1013-NEXT:    v_lshrrev_b32_e32 v14, 16, v6
-; GFX1013-NEXT:    v_and_b32_e32 v15, 0xffff, v8
+; GFX1013-NEXT:    v_mov_b32_e32 v18, v10
+; GFX1013-NEXT:    v_mov_b32_e32 v19, v11
+; GFX1013-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
+; GFX1013-NEXT:    v_and_b32_e32 v11, 0xffff, v8
 ; GFX1013-NEXT:    v_and_b32_e32 v9, 0xffff, v9
 ; GFX1013-NEXT:    s_mov_b32 s1, exec_lo
-; GFX1013-NEXT:    v_lshlrev_b32_e32 v14, 16, v14
-; GFX1013-NEXT:    v_lshlrev_b32_e32 v15, 16, v15
+; GFX1013-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
+; GFX1013-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
 ; GFX1013-NEXT:    v_alignbit_b32 v8, v9, v8, 16
-; GFX1013-NEXT:    v_and_or_b32 v6, 0xffff, v6, v14
-; GFX1013-NEXT:    v_and_or_b32 v7, 0xffff, v7, v15
+; GFX1013-NEXT:    v_and_or_b32 v6, 0xffff, v6, v10
+; GFX1013-NEXT:    v_and_or_b32 v7, 0xffff, v7, v11
 ; GFX1013-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT:    v_readfirstlane_b32 s4, v10
-; GFX1013-NEXT:    v_readfirstlane_b32 s5, v11
+; GFX1013-NEXT:    v_readfirstlane_b32 s4, v18
+; GFX1013-NEXT:    v_readfirstlane_b32 s5, v19
 ; GFX1013-NEXT:    v_readfirstlane_b32 s6, v12
 ; GFX1013-NEXT:    v_readfirstlane_b32 s7, v13
-; GFX1013-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
+; GFX1013-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
 ; GFX1013-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
 ; GFX1013-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX1013-NEXT:    s_and_saveexec_b32 s0, s0
 ; GFX1013-NEXT:    image_bvh64_intersect_ray v[14:17], v[0:8], s[4:7] a16
-; GFX1013-NEXT:    ; implicit-def: $vgpr10
+; GFX1013-NEXT:    ; implicit-def: $vgpr18
 ; GFX1013-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
 ; GFX1013-NEXT:    ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
 ; GFX1013-NEXT:    s_waitcnt_depctr 0xffe3
@@ -570,28 +591,28 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
 ; GFX11-NEXT:    v_and_b32_e32 v1, 0xffff, v9
 ; GFX11-NEXT:    v_dual_mov_b32 v19, v2 :: v_dual_mov_b32 v14, v3
 ; GFX11-NEXT:    v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v16, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_lshl_or_b32 v4, v6, 16, v0
-; GFX11-NEXT:    v_perm_b32 v5, v6, v8, 0x7060302
-; GFX11-NEXT:    v_lshl_or_b32 v6, v7, 16, v1
+; GFX11-NEXT:    v_dual_mov_b32 v4, v10 :: v_dual_mov_b32 v5, v11
+; GFX11-NEXT:    v_lshl_or_b32 v20, v6, 16, v0
+; GFX11-NEXT:    v_perm_b32 v21, v6, v8, 0x7060302
+; GFX11-NEXT:    v_lshl_or_b32 v22, v7, 16, v1
 ; GFX11-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX11-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT:    v_readfirstlane_b32 s4, v10
-; GFX11-NEXT:    v_readfirstlane_b32 s5, v11
+; GFX11-NEXT:    v_readfirstlane_b32 s4, v4
+; GFX11-NEXT:    v_readfirstlane_b32 s5, v5
 ; GFX11-NEXT:    v_readfirstlane_b32 s6, v12
 ; GFX11-NEXT:    v_readfirstlane_b32 s7, v13
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
+; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
 ; GFX11-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX11-NEXT:    s_and_saveexec_b32 s0, s0
-; GFX11-NEXT:    image_bvh64_intersect_ray v[0:3], [v[17:18], v19, v[14:16], v[4:6]], s[4:7] a16
-; GFX11-NEXT:    ; implicit-def: $vgpr10
+; GFX11-NEXT:    image_bvh64_intersect_ray v[0:3], [v[17:18], v19, v[14:16], v[20:22]], s[4:7] a16
+; GFX11-NEXT:    ; implicit-def: $vgpr4
 ; GFX11-NEXT:    ; implicit-def: $vgpr17_vgpr18
 ; GFX11-NEXT:    ; implicit-def: $vgpr19
 ; GFX11-NEXT:    ; implicit-def: $vgpr14_vgpr15_vgpr16
-; GFX11-NEXT:    ; implicit-def: $vgpr4_vgpr5_vgpr6
+; GFX11-NEXT:    ; implicit-def: $vgpr20_vgpr21_vgpr22
 ; GFX11-NEXT:    ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
 ; GFX11-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX11-NEXT:    s_cbranch_execnz .LBB9_1
diff --git a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
index 9c8244ef1e38b8..17b16d000e39a0 100644
--- a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
+++ b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
@@ -26,34 +26,34 @@ body: |
   ; RegAlloc-NEXT:   renamable $vgpr1 = COPY renamable $sgpr14
   ; RegAlloc-NEXT:   renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr1, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
   ; RegAlloc-NEXT:   renamable $vgpr5 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
-  ; RegAlloc-NEXT:   renamable $vgpr16 = V_MBCNT_HI_U32_B32_e64 -1, killed $vgpr5, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr18 = V_MBCNT_HI_U32_B32_e64 -1, killed $vgpr5, implicit $exec
   ; RegAlloc-NEXT:   renamable $sgpr14 = S_OR_B32 killed renamable $sgpr14, 16, implicit-def dead $scc
-  ; RegAlloc-NEXT:   renamable $vgpr5 = V_LSHLREV_B32_e64 5, $vgpr16, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr5 = V_LSHLREV_B32_e64 5, $vgpr18, implicit $exec
   ; RegAlloc-NEXT:   renamable $vgpr6 = COPY killed renamable $sgpr14
   ; RegAlloc-NEXT:   renamable $vgpr7 = V_OR_B32_e64 16, $vgpr5, implicit $exec
-  ; RegAlloc-NEXT:   renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr6, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $vgpr14_vgpr15_vgpr16_vgpr17 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr6, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
   ; RegAlloc-NEXT:   renamable $sgpr14 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-  ; RegAlloc-NEXT:   renamable $vgpr8_vgpr9_vgpr10_vgpr11 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr5, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $vgpr9_vgpr10_vgpr11_vgpr12 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr5, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
   ; RegAlloc-NEXT:   renamable $sgpr15 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec
   ; RegAlloc-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
   ; RegAlloc-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
-  ; RegAlloc-NEXT:   renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr7, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
-  ; RegAlloc-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec
-  ; RegAlloc-NEXT:   renamable $vgpr1 = COPY renamable $vgpr7
-  ; RegAlloc-NEXT:   renamable $sgpr26 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr27 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr28 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr7, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr13 = COPY renamable $vgpr8
+  ; RegAlloc-NEXT:   renamable $sgpr26 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr27 = V_READFIRSTLANE_B32 $vgpr15, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr28 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec
   ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr14_sgpr15, killed renamable $sgpr18_sgpr19, implicit-def $scc
-  ; RegAlloc-NEXT:   renamable $sgpr29 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr29 = V_READFIRSTLANE_B32 killed $vgpr17, implicit $exec
   ; RegAlloc-NEXT:   renamable $sgpr14 = S_CSELECT_B32 1, 0, implicit $scc
   ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr20_sgpr21, implicit-def $scc
-  ; RegAlloc-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
   ; RegAlloc-NEXT:   renamable $sgpr15 = S_CSELECT_B32 1, 0, implicit $scc
   ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr26_sgpr27, killed renamable $sgpr22_sgpr23, implicit-def $scc
   ; RegAlloc-NEXT:   renamable $sgpr16 = S_CSELECT_B32 1, 0, implicit $scc
@@ -62,12 +62,12 @@ body: |
   ; RegAlloc-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr14, killed renamable $sgpr15, implicit-def dead $scc
   ; RegAlloc-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr16, killed renamable $sgpr14, implicit-def dead $scc
   ; RegAlloc-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr17, killed renamable $sgpr14, implicit-def dead $scc
-  ; RegAlloc-NEXT:   renamable $sgpr16_sgpr17 = V_CMP_NE_U32_e64 $sgpr7, killed $vgpr16, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr16_sgpr17 = V_CMP_NE_U32_e64 $sgpr7, killed $vgpr18, implicit $exec
   ; RegAlloc-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr14, 1, implicit-def dead $scc
   ; RegAlloc-NEXT:   $scc = COPY killed renamable $sgpr14
   ; RegAlloc-NEXT:   renamable $sgpr14 = S_CSELECT_B32 1, 0, implicit $scc
   ; RegAlloc-NEXT:   renamable $sgpr12_sgpr13 = S_XOR_B64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
-  ; RegAlloc-NEXT:   renamable $vgpr3 = IMPLICIT_DEF
+  ; RegAlloc-NEXT:   renamable $vgpr15 = IMPLICIT_DEF
   ; RegAlloc-NEXT:   renamable $sgpr16_sgpr17 = COPY $exec, implicit-def $exec
   ; RegAlloc-NEXT:   renamable $sgpr18_sgpr19 = S_AND_B64 renamable $sgpr16_sgpr17, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
   ; RegAlloc-NEXT:   renamable $sgpr12_sgpr13 = S_XOR_B64 renamable $sgpr18_sgpr19, killed renamable $sgpr16_sgpr17, implicit-def dead $scc
@@ -77,7 +77,7 @@ body: |
   ; RegAlloc-NEXT: {{  $}}
   ; RegAlloc-NEXT: bb.1:
   ; RegAlloc-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
-  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr1_vgpr2:0x0000000000000003, $vgpr8_vgpr9_vgpr10_vgpr11:0x00000000000000FF, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000003F00
+  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $vgpr15, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr13_vgpr14:0x0000000000000003, $vgpr9_vgpr10_vgpr11_vgpr12:0x00000000000000FF, $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8:0x0000000000003F00
   ; RegAlloc-NEXT: {{  $}}
   ; RegAlloc-NEXT:   renamable $sgpr12_sgpr13 = S_OR_SAVEEXEC_B64 killed renamable $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
   ; RegAlloc-NEXT:   $exec = S_XOR_B64_term $exec, renamable $sgpr12_sgpr13, implicit-def $scc
@@ -89,41 +89,41 @@ body: |
   ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3
   ; RegAlloc-NEXT: {{  $}}
   ; RegAlloc-NEXT:   renamable $sgpr15 = S_OR_B32 renamable $sgpr14, 2, implicit-def dead $scc
-  ; RegAlloc-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr15
-  ; RegAlloc-NEXT:   renamable $vgpr8_vgpr9 = IMPLICIT_DEF
-  ; RegAlloc-NEXT:   renamable $vgpr1_vgpr2 = IMPLICIT_DEF
-  ; RegAlloc-NEXT:   renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
+  ; RegAlloc-NEXT:   renamable $vgpr15 = COPY killed renamable $sgpr15
+  ; RegAlloc-NEXT:   renamable $vgpr9_vgpr10 = IMPLICIT_DEF
+  ; RegAlloc-NEXT:   renamable $vgpr13_vgpr14 = IMPLICIT_DEF
+  ; RegAlloc-NEXT:   renamable $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = IMPLICIT_DEF
   ; RegAlloc-NEXT:   S_BRANCH %bb.1
   ; RegAlloc-NEXT: {{  $}}
   ; RegAlloc-NEXT: bb.3:
   ; RegAlloc-NEXT:   successors: %bb.5(0x80000000)
-  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr1_vgpr2:0x0000000000000003, $vgpr8_vgpr9_vgpr10_vgpr11:0x00000000000000FF, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000003F00
+  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr13_vgpr14:0x0000000000000003, $vgpr9_vgpr10_vgpr11_vgpr12:0x00000000000000FF, $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8:0x0000000000003F00
   ; RegAlloc-NEXT: {{  $}}
   ; RegAlloc-NEXT:   renamable $sgpr7 = S_LSHL_B32 killed renamable $sgpr7, 5, implicit-def dead $scc
-  ; RegAlloc-NEXT:   renamable $vgpr2 = COPY renamable $sgpr7
+  ; RegAlloc-NEXT:   renamable $vgpr1 = COPY renamable $sgpr7
   ; RegAlloc-NEXT:   renamable $sgpr7 = S_OR_B32 killed renamable $sgpr7, 16, implicit-def dead $scc
-  ; RegAlloc-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
-  ; RegAlloc-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr7
-  ; RegAlloc-NEXT:   renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr2, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
-  ; RegAlloc-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
-  ; RegAlloc-NEXT:   renamable $vgpr7_vgpr8_vgpr9_vgpr10 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
-  ; RegAlloc-NEXT:   renamable $sgpr0 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr1 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr2 = V_READFIRSTLANE_B32 killed $vgpr6, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr3 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec
-  ; RegAlloc-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
-  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr20_sgpr21, killed renamable $sgpr16_sgpr17, implicit-def $scc
   ; RegAlloc-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr8 = COPY killed renamable $sgpr7
+  ; RegAlloc-NEXT:   renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr1, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 killed $vgpr12, implicit $exec
+  ; RegAlloc-NEXT:   renamable $vgpr8_vgpr9_vgpr10_vgpr11 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr8, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   renamable $sgpr0 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr1 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr2 = V_READFIRSTLANE_B32 killed $vgpr7, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr3 = V_READFIRSTLANE_B32 killed $vgpr13, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr20_sgpr21, killed renamable $sgpr16_sgpr17, implicit-def $scc
+  ; RegAlloc-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
   ; RegAlloc-NEXT:   renamable $sgpr7 = S_CSELECT_B32 1, 0, implicit $scc
   ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr22_sgpr23, killed renamable $sgpr18_sgpr19, implicit-def $scc
-  ; RegAlloc-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr10, implicit $exec
+  ; RegAlloc-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
   ; RegAlloc-NEXT:   renamable $sgpr15 = S_CSELECT_B32 1, 0, implicit $scc
   ; RegAlloc-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr24_sgpr25, killed renamable $sgpr0_sgpr1, implicit-def $scc
   ; RegAlloc-NEXT:   renamable $sgpr0 = S_CSELECT_B32 1, 0, implicit $scc
@@ -137,11 +137,11 @@ body: |
   ; RegAlloc-NEXT:   renamable $sgpr0 = S_CSELECT_B32 1, 0, implicit $scc
   ; RegAlloc-NEXT:   renamable $sgpr0 = S_LSHL_B32 killed renamable $sgpr0, 1, implicit-def dead $scc
   ; RegAlloc-NEXT:   renamable $sgpr0 = S_OR_B32 killed renamable $sgpr0, killed renamable $sgpr14, implicit-def dead $scc
-  ; RegAlloc-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr0
+  ; RegAlloc-NEXT:   renamable $vgpr15 = COPY killed renamable $sgpr0
   ; RegAlloc-NEXT:   S_BRANCH %bb.5
   ; RegAlloc-NEXT: {{  $}}
   ; RegAlloc-NEXT: bb.4:
-  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11
+  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr15, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11
   ; RegAlloc-NEXT: {{  $}}
   ; RegAlloc-NEXT:   renamable $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (invariant load (<2 x s32>), align 16, addrspace 4)
   ; RegAlloc-NEXT:   renamable $vgpr1 = COPY killed renamable $sgpr5
@@ -152,12 +152,12 @@ body: |
   ; RegAlloc-NEXT:   renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr4, 6, implicit-def dead $scc
   ; RegAlloc-NEXT:   renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
   ; RegAlloc-NEXT:   renamable $vgpr0 = V_ADD_LSHL_U32_e64 killed $vgpr1, killed $vgpr0, 2, implicit $exec
-  ; RegAlloc-NEXT:   BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+  ; RegAlloc-NEXT:   BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr15, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
   ; RegAlloc-NEXT:   S_ENDPGM 0
   ; RegAlloc-NEXT: {{  $}}
   ; RegAlloc-NEXT: bb.5:
   ; RegAlloc-NEXT:   successors: %bb.4(0x80000000)
-  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13
+  ; RegAlloc-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr15, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13
   ; RegAlloc-NEXT: {{  $}}
   ; RegAlloc-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
   ; RegAlloc-NEXT:   S_BRANCH %bb.4
@@ -182,34 +182,34 @@ body: |
   ; DeadInstDelete-NEXT:   renamable $vgpr1 = COPY renamable $sgpr14
   ; DeadInstDelete-NEXT:   renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr1, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
   ; DeadInstDelete-NEXT:   renamable $vgpr5 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $vgpr16 = V_MBCNT_HI_U32_B32_e64 -1, killed $vgpr5, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr18 = V_MBCNT_HI_U32_B32_e64 -1, killed $vgpr5, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_OR_B32 killed renamable $sgpr14, 16, implicit-def dead $scc
-  ; DeadInstDelete-NEXT:   renamable $vgpr5 = V_LSHLREV_B32_e64 5, $vgpr16, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr5 = V_LSHLREV_B32_e64 5, $vgpr18, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $vgpr6 = COPY killed renamable $sgpr14
   ; DeadInstDelete-NEXT:   renamable $vgpr7 = V_OR_B32_e64 16, $vgpr5, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr6, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $vgpr14_vgpr15_vgpr16_vgpr17 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr6, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
   ; DeadInstDelete-NEXT:   renamable $sgpr14 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $vgpr8_vgpr9_vgpr10_vgpr11 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr5, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $vgpr9_vgpr10_vgpr11_vgpr12 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr5, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
   ; DeadInstDelete-NEXT:   renamable $sgpr15 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr7, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
-  ; DeadInstDelete-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $vgpr1 = COPY renamable $vgpr7
-  ; DeadInstDelete-NEXT:   renamable $sgpr26 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr27 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr28 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr7, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr13 = COPY renamable $vgpr8
+  ; DeadInstDelete-NEXT:   renamable $sgpr26 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr27 = V_READFIRSTLANE_B32 $vgpr15, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr28 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec
   ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr14_sgpr15, killed renamable $sgpr18_sgpr19, implicit-def $scc
-  ; DeadInstDelete-NEXT:   renamable $sgpr29 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr29 = V_READFIRSTLANE_B32 killed $vgpr17, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_CSELECT_B32 1, 0, implicit $scc
   ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr20_sgpr21, implicit-def $scc
-  ; DeadInstDelete-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $sgpr15 = S_CSELECT_B32 1, 0, implicit $scc
   ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr26_sgpr27, killed renamable $sgpr22_sgpr23, implicit-def $scc
   ; DeadInstDelete-NEXT:   renamable $sgpr16 = S_CSELECT_B32 1, 0, implicit $scc
@@ -218,12 +218,12 @@ body: |
   ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr14, killed renamable $sgpr15, implicit-def dead $scc
   ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr16, killed renamable $sgpr14, implicit-def dead $scc
   ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr17, killed renamable $sgpr14, implicit-def dead $scc
-  ; DeadInstDelete-NEXT:   renamable $sgpr16_sgpr17 = V_CMP_NE_U32_e64 $sgpr7, killed $vgpr16, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr16_sgpr17 = V_CMP_NE_U32_e64 $sgpr7, killed $vgpr18, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_AND_B32 killed renamable $sgpr14, 1, implicit-def dead $scc
   ; DeadInstDelete-NEXT:   $scc = COPY killed renamable $sgpr14
   ; DeadInstDelete-NEXT:   renamable $sgpr14 = S_CSELECT_B32 1, 0, implicit $scc
   ; DeadInstDelete-NEXT:   renamable $sgpr12_sgpr13 = S_XOR_B64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
-  ; DeadInstDelete-NEXT:   renamable $vgpr3 = IMPLICIT_DEF
+  ; DeadInstDelete-NEXT:   renamable $vgpr15 = IMPLICIT_DEF
   ; DeadInstDelete-NEXT:   renamable $sgpr16_sgpr17 = COPY $exec, implicit-def $exec
   ; DeadInstDelete-NEXT:   renamable $sgpr18_sgpr19 = S_AND_B64 renamable $sgpr16_sgpr17, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
   ; DeadInstDelete-NEXT:   renamable $sgpr12_sgpr13 = S_XOR_B64 renamable $sgpr18_sgpr19, killed renamable $sgpr16_sgpr17, implicit-def dead $scc
@@ -233,7 +233,7 @@ body: |
   ; DeadInstDelete-NEXT: {{  $}}
   ; DeadInstDelete-NEXT: bb.1:
   ; DeadInstDelete-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
-  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr1_vgpr2:0x0000000000000003, $vgpr8_vgpr9_vgpr10_vgpr11:0x00000000000000FF, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000003F00
+  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $vgpr15, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr13_vgpr14:0x0000000000000003, $vgpr9_vgpr10_vgpr11_vgpr12:0x00000000000000FF, $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8:0x0000000000003F00
   ; DeadInstDelete-NEXT: {{  $}}
   ; DeadInstDelete-NEXT:   renamable $sgpr12_sgpr13 = S_OR_SAVEEXEC_B64 killed renamable $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
   ; DeadInstDelete-NEXT:   $exec = S_XOR_B64_term $exec, renamable $sgpr12_sgpr13, implicit-def $scc
@@ -245,40 +245,41 @@ body: |
   ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3
   ; DeadInstDelete-NEXT: {{  $}}
   ; DeadInstDelete-NEXT:   renamable $sgpr15 = S_OR_B32 renamable $sgpr14, 2, implicit-def dead $scc
-  ; DeadInstDelete-NEXT:   renamable $vgpr8_vgpr9 = IMPLICIT_DEF
-  ; DeadInstDelete-NEXT:   renamable $vgpr1_vgpr2 = IMPLICIT_DEF
-  ; DeadInstDelete-NEXT:   renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
+  ; DeadInstDelete-NEXT:   renamable $vgpr15 = COPY killed renamable $sgpr15
+  ; DeadInstDelete-NEXT:   renamable $vgpr9_vgpr10 = IMPLICIT_DEF
+  ; DeadInstDelete-NEXT:   renamable $vgpr13_vgpr14 = IMPLICIT_DEF
+  ; DeadInstDelete-NEXT:   renamable $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = IMPLICIT_DEF
   ; DeadInstDelete-NEXT:   S_BRANCH %bb.1
   ; DeadInstDelete-NEXT: {{  $}}
   ; DeadInstDelete-NEXT: bb.3:
   ; DeadInstDelete-NEXT:   successors: %bb.5(0x80000000)
-  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr1_vgpr2:0x0000000000000003, $vgpr8_vgpr9_vgpr10_vgpr11:0x00000000000000FF, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000003F00
+  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr14, $vgpr0, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr13_vgpr14:0x0000000000000003, $vgpr9_vgpr10_vgpr11_vgpr12:0x00000000000000FF, $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8:0x0000000000003F00
   ; DeadInstDelete-NEXT: {{  $}}
   ; DeadInstDelete-NEXT:   renamable $sgpr7 = S_LSHL_B32 killed renamable $sgpr7, 5, implicit-def dead $scc
-  ; DeadInstDelete-NEXT:   renamable $vgpr2 = COPY renamable $sgpr7
+  ; DeadInstDelete-NEXT:   renamable $vgpr1 = COPY renamable $sgpr7
   ; DeadInstDelete-NEXT:   renamable $sgpr7 = S_OR_B32 killed renamable $sgpr7, 16, implicit-def dead $scc
-  ; DeadInstDelete-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr7
-  ; DeadInstDelete-NEXT:   renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr2, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
-  ; DeadInstDelete-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $vgpr7_vgpr8_vgpr9_vgpr10 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
-  ; DeadInstDelete-NEXT:   renamable $sgpr0 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr1 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr2 = V_READFIRSTLANE_B32 killed $vgpr6, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr3 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec
-  ; DeadInstDelete-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
-  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr20_sgpr21, killed renamable $sgpr16_sgpr17, implicit-def $scc
   ; DeadInstDelete-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr8 = COPY killed renamable $sgpr7
+  ; DeadInstDelete-NEXT:   renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr1, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr18 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr19 = V_READFIRSTLANE_B32 killed $vgpr12, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $vgpr8_vgpr9_vgpr10_vgpr11 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr8, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   renamable $sgpr0 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr1 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr2 = V_READFIRSTLANE_B32 killed $vgpr7, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr3 = V_READFIRSTLANE_B32 killed $vgpr13, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr20 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr21 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr22 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr23 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr24 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr25 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec
+  ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr20_sgpr21, killed renamable $sgpr16_sgpr17, implicit-def $scc
+  ; DeadInstDelete-NEXT:   renamable $sgpr16 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $sgpr7 = S_CSELECT_B32 1, 0, implicit $scc
   ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr22_sgpr23, killed renamable $sgpr18_sgpr19, implicit-def $scc
-  ; DeadInstDelete-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr10, implicit $exec
+  ; DeadInstDelete-NEXT:   renamable $sgpr17 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $sgpr15 = S_CSELECT_B32 1, 0, implicit $scc
   ; DeadInstDelete-NEXT:   S_CMP_EQ_U64 killed renamable $sgpr24_sgpr25, killed renamable $sgpr0_sgpr1, implicit-def $scc
   ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_CSELECT_B32 1, 0, implicit $scc
@@ -292,11 +293,11 @@ body: |
   ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_CSELECT_B32 1, 0, implicit $scc
   ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_LSHL_B32 killed renamable $sgpr0, 1, implicit-def dead $scc
   ; DeadInstDelete-NEXT:   renamable $sgpr0 = S_OR_B32 killed renamable $sgpr0, killed renamable $sgpr14, implicit-def dead $scc
-  ; DeadInstDelete-NEXT:   renamable $vgpr3 = COPY killed renamable $sgpr0
+  ; DeadInstDelete-NEXT:   renamable $vgpr15 = COPY killed renamable $sgpr0
   ; DeadInstDelete-NEXT:   S_BRANCH %bb.5
   ; DeadInstDelete-NEXT: {{  $}}
   ; DeadInstDelete-NEXT: bb.4:
-  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11
+  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr15, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11
   ; DeadInstDelete-NEXT: {{  $}}
   ; DeadInstDelete-NEXT:   renamable $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (invariant load (<2 x s32>), align 16, addrspace 4)
   ; DeadInstDelete-NEXT:   renamable $vgpr1 = COPY killed renamable $sgpr5
@@ -307,12 +308,12 @@ body: |
   ; DeadInstDelete-NEXT:   renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr4, 6, implicit-def dead $scc
   ; DeadInstDelete-NEXT:   renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
   ; DeadInstDelete-NEXT:   renamable $vgpr0 = V_ADD_LSHL_U32_e64 killed $vgpr1, killed $vgpr0, 2, implicit $exec
-  ; DeadInstDelete-NEXT:   BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+  ; DeadInstDelete-NEXT:   BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr15, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
   ; DeadInstDelete-NEXT:   S_ENDPGM 0
   ; DeadInstDelete-NEXT: {{  $}}
   ; DeadInstDelete-NEXT: bb.5:
   ; DeadInstDelete-NEXT:   successors: %bb.4(0x80000000)
-  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr3, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13
+  ; DeadInstDelete-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr15, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13
   ; DeadInstDelete-NEXT: {{  $}}
   ; DeadInstDelete-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
   ; DeadInstDelete-NEXT:   S_BRANCH %bb.4



More information about the llvm-commits mailing list