[llvm] RenameIndependentSubregs: Add missing sub-range for new IMPLICIT_DEFs (PR #89050)
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 23 01:46:12 PDT 2024
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/89050
>From 6030f75e6347fc5b2f5f59f47a8b3712ca071f62 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Tue, 23 Apr 2024 10:31:50 +0200
Subject: [PATCH 1/2] AMDGPU: precommit test for bug in
RenameIndependentSubregs
---
...rval-bug-in-rename-independent-subregs.mir | 207 ++++++++++++++++++
1 file changed, 207 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
diff --git a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
new file mode 100644
index 00000000000000..f990217aeae074
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
@@ -0,0 +1,207 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -start-before=rename-independent-subregs -mattr=+wavefrontsize64 -stop-before=amdgpu-mark-last-scratch-load %s -o - | FileCheck -check-prefix=REG_ALLOC %s
+# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -start-before=rename-independent-subregs -mattr=+wavefrontsize64 -stop-after=machine-cp %s -o - | FileCheck -check-prefix=DEAD_INST_DEL %s
+
+---
+name: _amdgpu_cs_main
+tracksRegLiveness: true
+body: |
+ ; REG_ALLOC-LABEL: name: _amdgpu_cs_main
+ ; REG_ALLOC: bb.0:
+ ; REG_ALLOC-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr3, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ ; REG_ALLOC-NEXT: renamable $vgpr15_vgpr16_vgpr17_vgpr18 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr2, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ ; REG_ALLOC-NEXT: renamable $vgpr11_vgpr12_vgpr13_vgpr14 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ ; REG_ALLOC-NEXT: KILL killed renamable $vgpr2
+ ; REG_ALLOC-NEXT: KILL killed renamable $vgpr0
+ ; REG_ALLOC-NEXT: KILL killed renamable $vgpr3
+ ; REG_ALLOC-NEXT: renamable $sgpr12 = V_READFIRSTLANE_B32 killed $vgpr5, implicit $exec
+ ; REG_ALLOC-NEXT: renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr4, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ ; REG_ALLOC-NEXT: renamable $sgpr13 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
+ ; REG_ALLOC-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec
+ ; REG_ALLOC-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ ; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
+ ; REG_ALLOC-NEXT: renamable $vgpr0 = IMPLICIT_DEF
+ ; REG_ALLOC-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7
+ ; REG_ALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+ ; REG_ALLOC-NEXT: S_BRANCH %bb.2
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: bb.1:
+ ; REG_ALLOC-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
+ ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; REG_ALLOC-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+ ; REG_ALLOC-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; REG_ALLOC-NEXT: S_BRANCH %bb.3
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: bb.2:
+ ; REG_ALLOC-NEXT: successors: %bb.1(0x80000000)
+ ; REG_ALLOC-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc
+ ; REG_ALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1
+ ; REG_ALLOC-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF
+ ; REG_ALLOC-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
+ ; REG_ALLOC-NEXT: S_BRANCH %bb.1
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: bb.3:
+ ; REG_ALLOC-NEXT: successors: %bb.5(0x80000000)
+ ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: renamable $sgpr1 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
+ ; REG_ALLOC-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
+ ; REG_ALLOC-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc
+ ; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
+ ; REG_ALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1
+ ; REG_ALLOC-NEXT: S_BRANCH %bb.5
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: bb.4:
+ ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
+ ; REG_ALLOC-NEXT: renamable $vgpr1 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec
+ ; REG_ALLOC-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+ ; REG_ALLOC-NEXT: S_ENDPGM 0
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: bb.5:
+ ; REG_ALLOC-NEXT: successors: %bb.4(0x80000000)
+ ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5
+ ; REG_ALLOC-NEXT: {{ $}}
+ ; REG_ALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ ; REG_ALLOC-NEXT: S_BRANCH %bb.4
+ ;
+ ; DEAD_INST_DEL-LABEL: name: _amdgpu_cs_main
+ ; DEAD_INST_DEL: bb.0:
+ ; DEAD_INST_DEL-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr3, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr15_vgpr16_vgpr17_vgpr18 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr2, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr11_vgpr12_vgpr13_vgpr14 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ ; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr2
+ ; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr0
+ ; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr3
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr12 = V_READFIRSTLANE_B32 killed $vgpr5, implicit $exec
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr4, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr13 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec
+ ; DEAD_INST_DEL-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = IMPLICIT_DEF
+ ; DEAD_INST_DEL-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7
+ ; DEAD_INST_DEL-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+ ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.2
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: bb.1:
+ ; DEAD_INST_DEL-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
+ ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; DEAD_INST_DEL-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+ ; DEAD_INST_DEL-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.3
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: bb.2:
+ ; DEAD_INST_DEL-NEXT: successors: %bb.1(0x80000000)
+ ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
+ ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.1
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: bb.3:
+ ; DEAD_INST_DEL-NEXT: successors: %bb.5(0x80000000)
+ ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
+ ; DEAD_INST_DEL-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1
+ ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.5
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: bb.4:
+ ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr1 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec
+ ; DEAD_INST_DEL-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+ ; DEAD_INST_DEL-NEXT: S_ENDPGM 0
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: bb.5:
+ ; DEAD_INST_DEL-NEXT: successors: %bb.4(0x80000000)
+ ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5
+ ; DEAD_INST_DEL-NEXT: {{ $}}
+ ; DEAD_INST_DEL-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
+ ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.4
+ bb.0:
+ successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ liveins: $sgpr0, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr10
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:vgpr_32 = COPY $vgpr0
+ %2:vgpr_32 = COPY $vgpr1
+ %3:vgpr_32 = COPY $vgpr2
+ %4:vgpr_32 = COPY $vgpr3
+ %5:vgpr_32 = COPY $vgpr4
+ %6:vgpr_32 = COPY $vgpr10
+ %7:sreg_64 = COPY $sgpr2_sgpr3
+ %8:sreg_64 = COPY $sgpr4_sgpr5
+ %9:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11
+ undef %10.sub0_sub1_sub2_sub3:vreg_256 = BUFFER_LOAD_DWORDX4_OFFEN %1, %9, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ %10.sub4_sub5_sub6_sub7:vreg_256 = BUFFER_LOAD_DWORDX4_OFFEN %5, %9, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ %11:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %4, %9, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN %3, %9, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8)
+ undef %13.sub0:sgpr_256 = V_READFIRSTLANE_B32 %11.sub0, implicit $exec
+ %13.sub1:sgpr_256 = V_READFIRSTLANE_B32 %12.sub0, implicit $exec
+ S_CMP_EQ_U64 %13.sub0_sub1, %7, implicit-def $scc
+ %14:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+ %15:sreg_64_xexec = V_CMP_NE_U32_e64 %2, 0, implicit $exec
+ %16:vgpr_32 = IMPLICIT_DEF
+ $exec = S_MOV_B64_term %15
+ S_CBRANCH_EXECZ %bb.1, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.3(0x40000000), %bb.5(0x40000000)
+
+ %17:sreg_64 = S_OR_SAVEEXEC_B64 %15, implicit-def $exec, implicit-def $scc, implicit $exec
+ %18:sreg_64_xexec = S_AND_B64 $exec, %17, implicit-def $scc
+ $exec = S_XOR_B64_term $exec, %18, implicit-def $scc
+ S_CBRANCH_EXECZ %bb.5, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ successors: %bb.1(0x80000000)
+
+ %19:sreg_32 = S_OR_B32 %14, 2, implicit-def dead $scc
+ %16:vgpr_32 = COPY %19
+ undef %10.sub0_sub1:vreg_256 = IMPLICIT_DEF
+ S_BRANCH %bb.1
+
+ bb.3:
+ successors: %bb.5(0x80000000)
+
+ %20:sreg_32 = V_READFIRSTLANE_B32 %10.sub0, implicit $exec
+ %21:sreg_32 = V_READFIRSTLANE_B32 %10.sub4, implicit $exec
+ S_CMP_EQ_U32 %21, %20, implicit-def $scc
+ %22:sreg_32 = S_CSELECT_B32 1, 0, implicit killed $scc
+ %16:vgpr_32 = COPY %22
+ S_BRANCH %bb.5
+
+ bb.4:
+ %23:vgpr_32 = V_ADD_U32_e64 %0, %6, 0, implicit $exec
+ %24:sgpr_128 = S_LOAD_DWORDX4_IMM %8, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
+ BUFFER_STORE_DWORD_OFFEN_exact %16, %23, %24, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+ S_ENDPGM 0
+
+ bb.5:
+ successors: %bb.4(0x80000000)
+
+ $exec = S_OR_B64 $exec, %18, implicit-def $scc
+ S_BRANCH %bb.4
+...
>From b2523beb6b12bc06df66c1ee1d4db911c10c46e4 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Tue, 23 Apr 2024 10:36:37 +0200
Subject: [PATCH 2/2] RenameIndependentSubregs: Add missing sub-range for new
IMPLICIT_DEFs
Existing sub-ranges are correctly updated because new IMPLICIT_DEF is
added, but there is missing sub-range for IMPLICIT_DEF itself.
Because of missing sub-range in live-intervals for IMPLICIT_DEF,
register allocator does not know that IMPLICIT_DEF rewrites its
virtual sub-registers and can end up assigning overlapping physical
registers to them.
This results in deleting instructions that were defined by sub-registers
overwritten by IMPLICIT_DEF as they are now dead.
---
llvm/lib/CodeGen/RenameIndependentSubregs.cpp | 7 +
.../GlobalISel/llvm.amdgcn.intersect_ray.ll | 291 ++++++++++--------
...rval-bug-in-rename-independent-subregs.mir | 31 +-
3 files changed, 179 insertions(+), 150 deletions(-)
diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index bc3ef1c0329a98..e888f290df510d 100644
--- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -334,10 +334,17 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
DebugLoc(), MCDesc, Reg);
SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef);
SlotIndex RegDefIdx = DefIdx.getRegSlot();
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(Reg);
for (LiveInterval::SubRange &SR : LI.subranges()) {
+ Mask = Mask & ~SR.LaneMask;
VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator);
SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI));
}
+
+ if (!Mask.none()) {
+ LiveInterval::SubRange *SR = LI.createSubRange(Allocator, Mask);
+ SR->createDeadDef(RegDefIdx, Allocator);
+ }
}
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
index 47e476de74cf41..6e96a4ddbc0b3b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
@@ -144,40 +144,42 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) {
; GFX1030-LABEL: image_bvh_intersect_ray_vgpr_descr:
; GFX1030: ; %bb.0:
-; GFX1030-NEXT: v_mov_b32_e32 v15, v0
-; GFX1030-NEXT: v_mov_b32_e32 v16, v1
-; GFX1030-NEXT: v_mov_b32_e32 v17, v2
-; GFX1030-NEXT: v_mov_b32_e32 v18, v3
-; GFX1030-NEXT: v_mov_b32_e32 v19, v4
-; GFX1030-NEXT: v_mov_b32_e32 v20, v5
-; GFX1030-NEXT: v_mov_b32_e32 v21, v6
-; GFX1030-NEXT: v_mov_b32_e32 v22, v7
-; GFX1030-NEXT: v_mov_b32_e32 v23, v8
-; GFX1030-NEXT: v_mov_b32_e32 v24, v9
-; GFX1030-NEXT: v_mov_b32_e32 v25, v10
+; GFX1030-NEXT: v_mov_b32_e32 v21, v0
+; GFX1030-NEXT: v_mov_b32_e32 v22, v1
+; GFX1030-NEXT: v_mov_b32_e32 v23, v2
+; GFX1030-NEXT: v_mov_b32_e32 v24, v3
+; GFX1030-NEXT: v_mov_b32_e32 v25, v4
+; GFX1030-NEXT: v_mov_b32_e32 v26, v5
+; GFX1030-NEXT: v_mov_b32_e32 v27, v6
+; GFX1030-NEXT: v_mov_b32_e32 v28, v7
+; GFX1030-NEXT: v_mov_b32_e32 v29, v8
+; GFX1030-NEXT: v_mov_b32_e32 v30, v9
+; GFX1030-NEXT: v_mov_b32_e32 v31, v10
+; GFX1030-NEXT: v_mov_b32_e32 v19, v11
+; GFX1030-NEXT: v_mov_b32_e32 v20, v12
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
; GFX1030-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GFX1030-NEXT: v_readfirstlane_b32 s4, v11
-; GFX1030-NEXT: v_readfirstlane_b32 s5, v12
+; GFX1030-NEXT: v_readfirstlane_b32 s4, v19
+; GFX1030-NEXT: v_readfirstlane_b32 s5, v20
; GFX1030-NEXT: v_readfirstlane_b32 s6, v13
; GFX1030-NEXT: v_readfirstlane_b32 s7, v14
-; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
+; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20]
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
-; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:25], s[4:7]
-; GFX1030-NEXT: ; implicit-def: $vgpr11
-; GFX1030-NEXT: ; implicit-def: $vgpr15
-; GFX1030-NEXT: ; implicit-def: $vgpr16
-; GFX1030-NEXT: ; implicit-def: $vgpr17
-; GFX1030-NEXT: ; implicit-def: $vgpr18
+; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[21:31], s[4:7]
; GFX1030-NEXT: ; implicit-def: $vgpr19
-; GFX1030-NEXT: ; implicit-def: $vgpr20
; GFX1030-NEXT: ; implicit-def: $vgpr21
; GFX1030-NEXT: ; implicit-def: $vgpr22
; GFX1030-NEXT: ; implicit-def: $vgpr23
; GFX1030-NEXT: ; implicit-def: $vgpr24
; GFX1030-NEXT: ; implicit-def: $vgpr25
+; GFX1030-NEXT: ; implicit-def: $vgpr26
+; GFX1030-NEXT: ; implicit-def: $vgpr27
+; GFX1030-NEXT: ; implicit-def: $vgpr28
+; GFX1030-NEXT: ; implicit-def: $vgpr29
+; GFX1030-NEXT: ; implicit-def: $vgpr30
+; GFX1030-NEXT: ; implicit-def: $vgpr31
; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz .LBB6_1
@@ -188,18 +190,20 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
;
; GFX1013-LABEL: image_bvh_intersect_ray_vgpr_descr:
; GFX1013: ; %bb.0:
+; GFX1013-NEXT: v_mov_b32_e32 v19, v11
+; GFX1013-NEXT: v_mov_b32_e32 v20, v12
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
; GFX1013-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT: v_readfirstlane_b32 s4, v11
-; GFX1013-NEXT: v_readfirstlane_b32 s5, v12
+; GFX1013-NEXT: v_readfirstlane_b32 s4, v19
+; GFX1013-NEXT: v_readfirstlane_b32 s5, v20
; GFX1013-NEXT: v_readfirstlane_b32 s6, v13
; GFX1013-NEXT: v_readfirstlane_b32 s7, v14
-; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
+; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20]
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
; GFX1013-NEXT: image_bvh_intersect_ray v[15:18], v[0:10], s[4:7]
-; GFX1013-NEXT: ; implicit-def: $vgpr11
+; GFX1013-NEXT: ; implicit-def: $vgpr19
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10
; GFX1013-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
@@ -216,25 +220,27 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
;
; GFX11-LABEL: image_bvh_intersect_ray_vgpr_descr:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v18, v0 :: v_dual_mov_b32 v19, v1
+; GFX11-NEXT: v_dual_mov_b32 v20, v0 :: v_dual_mov_b32 v21, v1
; GFX11-NEXT: v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v16, v3
-; GFX11-NEXT: v_mov_b32_e32 v17, v4
+; GFX11-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v11
+; GFX11-NEXT: v_mov_b32_e32 v19, v12
; GFX11-NEXT: s_mov_b32 s1, exec_lo
; GFX11-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT: v_readfirstlane_b32 s4, v11
-; GFX11-NEXT: v_readfirstlane_b32 s5, v12
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_readfirstlane_b32 s4, v18
+; GFX11-NEXT: v_readfirstlane_b32 s5, v19
; GFX11-NEXT: v_readfirstlane_b32 s6, v13
; GFX11-NEXT: v_readfirstlane_b32 s7, v14
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-NEXT: s_and_saveexec_b32 s0, s0
-; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v18, v19, v[15:17], v[5:7], v[8:10]], s[4:7]
-; GFX11-NEXT: ; implicit-def: $vgpr11
+; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v20, v21, v[15:17], v[5:7], v[8:10]], s[4:7]
; GFX11-NEXT: ; implicit-def: $vgpr18
-; GFX11-NEXT: ; implicit-def: $vgpr19
+; GFX11-NEXT: ; implicit-def: $vgpr20
+; GFX11-NEXT: ; implicit-def: $vgpr21
; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17
; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7
; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10
@@ -253,39 +259,41 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) {
; GFX1030-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
; GFX1030: ; %bb.0:
-; GFX1030-NEXT: v_mov_b32_e32 v13, v0
-; GFX1030-NEXT: v_mov_b32_e32 v14, v1
+; GFX1030-NEXT: v_mov_b32_e32 v18, v0
+; GFX1030-NEXT: v_mov_b32_e32 v19, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v5
; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v7
-; GFX1030-NEXT: v_mov_b32_e32 v15, v2
+; GFX1030-NEXT: v_mov_b32_e32 v20, v2
; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v8
-; GFX1030-NEXT: v_mov_b32_e32 v16, v3
+; GFX1030-NEXT: v_mov_b32_e32 v21, v3
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mov_b32_e32 v17, v4
-; GFX1030-NEXT: v_alignbit_b32 v20, v2, v7, 16
+; GFX1030-NEXT: v_mov_b32_e32 v22, v4
+; GFX1030-NEXT: v_mov_b32_e32 v16, v9
+; GFX1030-NEXT: v_mov_b32_e32 v17, v10
+; GFX1030-NEXT: v_and_or_b32 v23, 0xffff, v5, v0
+; GFX1030-NEXT: v_and_or_b32 v24, 0xffff, v6, v1
+; GFX1030-NEXT: v_alignbit_b32 v25, v2, v7, 16
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
-; GFX1030-NEXT: v_and_or_b32 v18, 0xffff, v5, v0
-; GFX1030-NEXT: v_and_or_b32 v19, 0xffff, v6, v1
; GFX1030-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GFX1030-NEXT: v_readfirstlane_b32 s4, v9
-; GFX1030-NEXT: v_readfirstlane_b32 s5, v10
+; GFX1030-NEXT: v_readfirstlane_b32 s4, v16
+; GFX1030-NEXT: v_readfirstlane_b32 s5, v17
; GFX1030-NEXT: v_readfirstlane_b32 s6, v11
; GFX1030-NEXT: v_readfirstlane_b32 s7, v12
-; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10]
+; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12]
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
-; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[13:20], s[4:7] a16
-; GFX1030-NEXT: ; implicit-def: $vgpr9
-; GFX1030-NEXT: ; implicit-def: $vgpr13
-; GFX1030-NEXT: ; implicit-def: $vgpr14
-; GFX1030-NEXT: ; implicit-def: $vgpr15
+; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[18:25], s[4:7] a16
; GFX1030-NEXT: ; implicit-def: $vgpr16
-; GFX1030-NEXT: ; implicit-def: $vgpr17
; GFX1030-NEXT: ; implicit-def: $vgpr18
; GFX1030-NEXT: ; implicit-def: $vgpr19
; GFX1030-NEXT: ; implicit-def: $vgpr20
+; GFX1030-NEXT: ; implicit-def: $vgpr21
+; GFX1030-NEXT: ; implicit-def: $vgpr22
+; GFX1030-NEXT: ; implicit-def: $vgpr23
+; GFX1030-NEXT: ; implicit-def: $vgpr24
+; GFX1030-NEXT: ; implicit-def: $vgpr25
; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz .LBB7_1
@@ -296,26 +304,28 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
;
; GFX1013-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
; GFX1013: ; %bb.0:
-; GFX1013-NEXT: v_lshrrev_b32_e32 v13, 16, v5
-; GFX1013-NEXT: v_and_b32_e32 v14, 0xffff, v7
+; GFX1013-NEXT: v_mov_b32_e32 v17, v9
+; GFX1013-NEXT: v_mov_b32_e32 v18, v10
+; GFX1013-NEXT: v_lshrrev_b32_e32 v9, 16, v5
+; GFX1013-NEXT: v_and_b32_e32 v10, 0xffff, v7
; GFX1013-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
-; GFX1013-NEXT: v_lshlrev_b32_e32 v13, 16, v13
-; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14
+; GFX1013-NEXT: v_lshlrev_b32_e32 v9, 16, v9
+; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10
; GFX1013-NEXT: v_alignbit_b32 v7, v8, v7, 16
-; GFX1013-NEXT: v_and_or_b32 v5, 0xffff, v5, v13
-; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v14
+; GFX1013-NEXT: v_and_or_b32 v5, 0xffff, v5, v9
+; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v10
; GFX1013-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT: v_readfirstlane_b32 s4, v9
-; GFX1013-NEXT: v_readfirstlane_b32 s5, v10
+; GFX1013-NEXT: v_readfirstlane_b32 s4, v17
+; GFX1013-NEXT: v_readfirstlane_b32 s5, v18
; GFX1013-NEXT: v_readfirstlane_b32 s6, v11
; GFX1013-NEXT: v_readfirstlane_b32 s7, v12
-; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10]
+; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18]
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12]
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
; GFX1013-NEXT: image_bvh_intersect_ray v[13:16], v[0:7], s[4:7] a16
-; GFX1013-NEXT: ; implicit-def: $vgpr9
+; GFX1013-NEXT: ; implicit-def: $vgpr17
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
@@ -333,27 +343,28 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
; GFX11-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1
-; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_and_b32 v0, 0xffff, v7
+; GFX11-NEXT: v_dual_mov_b32 v19, v10 :: v_dual_and_b32 v0, 0xffff, v7
; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v8
; GFX11-NEXT: v_dual_mov_b32 v13, v2 :: v_dual_mov_b32 v14, v3
-; GFX11-NEXT: s_mov_b32 s1, exec_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v18, v9
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-NEXT: v_lshl_or_b32 v4, v5, 16, v0
; GFX11-NEXT: v_perm_b32 v5, v5, v7, 0x7060302
; GFX11-NEXT: v_lshl_or_b32 v6, v6, 16, v1
+; GFX11-NEXT: s_mov_b32 s1, exec_lo
; GFX11-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT: v_readfirstlane_b32 s4, v9
-; GFX11-NEXT: v_readfirstlane_b32 s5, v10
+; GFX11-NEXT: v_readfirstlane_b32 s4, v18
+; GFX11-NEXT: v_readfirstlane_b32 s5, v19
; GFX11-NEXT: v_readfirstlane_b32 s6, v11
; GFX11-NEXT: v_readfirstlane_b32 s7, v12
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10]
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-NEXT: s_and_saveexec_b32 s0, s0
; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v16, v17, v[13:15], v[4:6]], s[4:7] a16
-; GFX11-NEXT: ; implicit-def: $vgpr9
+; GFX11-NEXT: ; implicit-def: $vgpr18
; GFX11-NEXT: ; implicit-def: $vgpr16
; GFX11-NEXT: ; implicit-def: $vgpr17
; GFX11-NEXT: ; implicit-def: $vgpr13_vgpr14_vgpr15
@@ -373,42 +384,44 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) {
; GFX1030-LABEL: image_bvh64_intersect_ray_vgpr_descr:
; GFX1030: ; %bb.0:
-; GFX1030-NEXT: v_mov_b32_e32 v16, v0
-; GFX1030-NEXT: v_mov_b32_e32 v17, v1
-; GFX1030-NEXT: v_mov_b32_e32 v18, v2
-; GFX1030-NEXT: v_mov_b32_e32 v19, v3
-; GFX1030-NEXT: v_mov_b32_e32 v20, v4
-; GFX1030-NEXT: v_mov_b32_e32 v21, v5
-; GFX1030-NEXT: v_mov_b32_e32 v22, v6
-; GFX1030-NEXT: v_mov_b32_e32 v23, v7
-; GFX1030-NEXT: v_mov_b32_e32 v24, v8
-; GFX1030-NEXT: v_mov_b32_e32 v25, v9
-; GFX1030-NEXT: v_mov_b32_e32 v26, v10
-; GFX1030-NEXT: v_mov_b32_e32 v27, v11
+; GFX1030-NEXT: v_mov_b32_e32 v22, v0
+; GFX1030-NEXT: v_mov_b32_e32 v23, v1
+; GFX1030-NEXT: v_mov_b32_e32 v24, v2
+; GFX1030-NEXT: v_mov_b32_e32 v25, v3
+; GFX1030-NEXT: v_mov_b32_e32 v26, v4
+; GFX1030-NEXT: v_mov_b32_e32 v27, v5
+; GFX1030-NEXT: v_mov_b32_e32 v28, v6
+; GFX1030-NEXT: v_mov_b32_e32 v29, v7
+; GFX1030-NEXT: v_mov_b32_e32 v30, v8
+; GFX1030-NEXT: v_mov_b32_e32 v31, v9
+; GFX1030-NEXT: v_mov_b32_e32 v32, v10
+; GFX1030-NEXT: v_mov_b32_e32 v33, v11
+; GFX1030-NEXT: v_mov_b32_e32 v20, v12
+; GFX1030-NEXT: v_mov_b32_e32 v21, v13
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
; GFX1030-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GFX1030-NEXT: v_readfirstlane_b32 s4, v12
-; GFX1030-NEXT: v_readfirstlane_b32 s5, v13
+; GFX1030-NEXT: v_readfirstlane_b32 s4, v20
+; GFX1030-NEXT: v_readfirstlane_b32 s5, v21
; GFX1030-NEXT: v_readfirstlane_b32 s6, v14
; GFX1030-NEXT: v_readfirstlane_b32 s7, v15
-; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
+; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[20:21]
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
-; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:27], s[4:7]
-; GFX1030-NEXT: ; implicit-def: $vgpr12
-; GFX1030-NEXT: ; implicit-def: $vgpr16
-; GFX1030-NEXT: ; implicit-def: $vgpr17
-; GFX1030-NEXT: ; implicit-def: $vgpr18
-; GFX1030-NEXT: ; implicit-def: $vgpr19
+; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[22:33], s[4:7]
; GFX1030-NEXT: ; implicit-def: $vgpr20
-; GFX1030-NEXT: ; implicit-def: $vgpr21
; GFX1030-NEXT: ; implicit-def: $vgpr22
; GFX1030-NEXT: ; implicit-def: $vgpr23
; GFX1030-NEXT: ; implicit-def: $vgpr24
; GFX1030-NEXT: ; implicit-def: $vgpr25
; GFX1030-NEXT: ; implicit-def: $vgpr26
; GFX1030-NEXT: ; implicit-def: $vgpr27
+; GFX1030-NEXT: ; implicit-def: $vgpr28
+; GFX1030-NEXT: ; implicit-def: $vgpr29
+; GFX1030-NEXT: ; implicit-def: $vgpr30
+; GFX1030-NEXT: ; implicit-def: $vgpr31
+; GFX1030-NEXT: ; implicit-def: $vgpr32
+; GFX1030-NEXT: ; implicit-def: $vgpr33
; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz .LBB8_1
@@ -419,18 +432,20 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
;
; GFX1013-LABEL: image_bvh64_intersect_ray_vgpr_descr:
; GFX1013: ; %bb.0:
+; GFX1013-NEXT: v_mov_b32_e32 v20, v12
+; GFX1013-NEXT: v_mov_b32_e32 v21, v13
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT: v_readfirstlane_b32 s4, v12
-; GFX1013-NEXT: v_readfirstlane_b32 s5, v13
+; GFX1013-NEXT: v_readfirstlane_b32 s4, v20
+; GFX1013-NEXT: v_readfirstlane_b32 s5, v21
; GFX1013-NEXT: v_readfirstlane_b32 s6, v14
; GFX1013-NEXT: v_readfirstlane_b32 s7, v15
-; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
+; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[20:21]
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
; GFX1013-NEXT: image_bvh64_intersect_ray v[16:19], v[0:11], s[4:7]
-; GFX1013-NEXT: ; implicit-def: $vgpr12
+; GFX1013-NEXT: ; implicit-def: $vgpr20
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
; GFX1013-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
@@ -450,20 +465,22 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
; GFX11-NEXT: v_dual_mov_b32 v19, v0 :: v_dual_mov_b32 v20, v1
; GFX11-NEXT: v_dual_mov_b32 v21, v2 :: v_dual_mov_b32 v16, v3
; GFX11-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v5
+; GFX11-NEXT: v_dual_mov_b32 v4, v12 :: v_dual_mov_b32 v5, v13
; GFX11-NEXT: s_mov_b32 s1, exec_lo
; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT: v_readfirstlane_b32 s4, v12
-; GFX11-NEXT: v_readfirstlane_b32 s5, v13
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_readfirstlane_b32 s4, v4
+; GFX11-NEXT: v_readfirstlane_b32 s5, v5
; GFX11-NEXT: v_readfirstlane_b32 s6, v14
; GFX11-NEXT: v_readfirstlane_b32 s7, v15
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13]
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-NEXT: s_and_saveexec_b32 s0, s0
; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[19:20], v21, v[16:18], v[6:8], v[9:11]], s[4:7]
-; GFX11-NEXT: ; implicit-def: $vgpr12
+; GFX11-NEXT: ; implicit-def: $vgpr4
; GFX11-NEXT: ; implicit-def: $vgpr19_vgpr20
; GFX11-NEXT: ; implicit-def: $vgpr21
; GFX11-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18
@@ -484,41 +501,43 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) {
; GFX1030-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
; GFX1030: ; %bb.0:
-; GFX1030-NEXT: v_mov_b32_e32 v14, v0
-; GFX1030-NEXT: v_mov_b32_e32 v15, v1
+; GFX1030-NEXT: v_mov_b32_e32 v19, v0
+; GFX1030-NEXT: v_mov_b32_e32 v20, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v6
; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v8
-; GFX1030-NEXT: v_mov_b32_e32 v16, v2
+; GFX1030-NEXT: v_mov_b32_e32 v21, v2
; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v9
-; GFX1030-NEXT: v_mov_b32_e32 v17, v3
+; GFX1030-NEXT: v_mov_b32_e32 v22, v3
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT: v_mov_b32_e32 v18, v4
-; GFX1030-NEXT: v_mov_b32_e32 v19, v5
-; GFX1030-NEXT: v_alignbit_b32 v22, v2, v8, 16
-; GFX1030-NEXT: v_and_or_b32 v20, 0xffff, v6, v0
-; GFX1030-NEXT: v_and_or_b32 v21, 0xffff, v7, v1
+; GFX1030-NEXT: v_mov_b32_e32 v23, v4
+; GFX1030-NEXT: v_mov_b32_e32 v24, v5
+; GFX1030-NEXT: v_mov_b32_e32 v17, v10
+; GFX1030-NEXT: v_mov_b32_e32 v18, v11
+; GFX1030-NEXT: v_and_or_b32 v25, 0xffff, v6, v0
+; GFX1030-NEXT: v_and_or_b32 v26, 0xffff, v7, v1
+; GFX1030-NEXT: v_alignbit_b32 v27, v2, v8, 16
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
; GFX1030-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GFX1030-NEXT: v_readfirstlane_b32 s4, v10
-; GFX1030-NEXT: v_readfirstlane_b32 s5, v11
+; GFX1030-NEXT: v_readfirstlane_b32 s4, v17
+; GFX1030-NEXT: v_readfirstlane_b32 s5, v18
; GFX1030-NEXT: v_readfirstlane_b32 s6, v12
; GFX1030-NEXT: v_readfirstlane_b32 s7, v13
-; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
+; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18]
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
-; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:22], s[4:7] a16
-; GFX1030-NEXT: ; implicit-def: $vgpr10
-; GFX1030-NEXT: ; implicit-def: $vgpr14
-; GFX1030-NEXT: ; implicit-def: $vgpr15
-; GFX1030-NEXT: ; implicit-def: $vgpr16
+; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[19:27], s[4:7] a16
; GFX1030-NEXT: ; implicit-def: $vgpr17
-; GFX1030-NEXT: ; implicit-def: $vgpr18
; GFX1030-NEXT: ; implicit-def: $vgpr19
; GFX1030-NEXT: ; implicit-def: $vgpr20
; GFX1030-NEXT: ; implicit-def: $vgpr21
; GFX1030-NEXT: ; implicit-def: $vgpr22
+; GFX1030-NEXT: ; implicit-def: $vgpr23
+; GFX1030-NEXT: ; implicit-def: $vgpr24
+; GFX1030-NEXT: ; implicit-def: $vgpr25
+; GFX1030-NEXT: ; implicit-def: $vgpr26
+; GFX1030-NEXT: ; implicit-def: $vgpr27
; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz .LBB9_1
@@ -529,26 +548,28 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
;
; GFX1013-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
; GFX1013: ; %bb.0:
-; GFX1013-NEXT: v_lshrrev_b32_e32 v14, 16, v6
-; GFX1013-NEXT: v_and_b32_e32 v15, 0xffff, v8
+; GFX1013-NEXT: v_mov_b32_e32 v18, v10
+; GFX1013-NEXT: v_mov_b32_e32 v19, v11
+; GFX1013-NEXT: v_lshrrev_b32_e32 v10, 16, v6
+; GFX1013-NEXT: v_and_b32_e32 v11, 0xffff, v8
; GFX1013-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
-; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14
-; GFX1013-NEXT: v_lshlrev_b32_e32 v15, 16, v15
+; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10
+; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16
-; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v14
-; GFX1013-NEXT: v_and_or_b32 v7, 0xffff, v7, v15
+; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v10
+; GFX1013-NEXT: v_and_or_b32 v7, 0xffff, v7, v11
; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT: v_readfirstlane_b32 s4, v10
-; GFX1013-NEXT: v_readfirstlane_b32 s5, v11
+; GFX1013-NEXT: v_readfirstlane_b32 s4, v18
+; GFX1013-NEXT: v_readfirstlane_b32 s5, v19
; GFX1013-NEXT: v_readfirstlane_b32 s6, v12
; GFX1013-NEXT: v_readfirstlane_b32 s7, v13
-; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
+; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
; GFX1013-NEXT: image_bvh64_intersect_ray v[14:17], v[0:8], s[4:7] a16
-; GFX1013-NEXT: ; implicit-def: $vgpr10
+; GFX1013-NEXT: ; implicit-def: $vgpr18
; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
@@ -570,28 +591,28 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v9
; GFX11-NEXT: v_dual_mov_b32 v19, v2 :: v_dual_mov_b32 v14, v3
; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v16, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT: v_lshl_or_b32 v4, v6, 16, v0
-; GFX11-NEXT: v_perm_b32 v5, v6, v8, 0x7060302
-; GFX11-NEXT: v_lshl_or_b32 v6, v7, 16, v1
+; GFX11-NEXT: v_dual_mov_b32 v4, v10 :: v_dual_mov_b32 v5, v11
+; GFX11-NEXT: v_lshl_or_b32 v20, v6, 16, v0
+; GFX11-NEXT: v_perm_b32 v21, v6, v8, 0x7060302
+; GFX11-NEXT: v_lshl_or_b32 v22, v7, 16, v1
; GFX11-NEXT: s_mov_b32 s1, exec_lo
; GFX11-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT: v_readfirstlane_b32 s4, v10
-; GFX11-NEXT: v_readfirstlane_b32 s5, v11
+; GFX11-NEXT: v_readfirstlane_b32 s4, v4
+; GFX11-NEXT: v_readfirstlane_b32 s5, v5
; GFX11-NEXT: v_readfirstlane_b32 s6, v12
; GFX11-NEXT: v_readfirstlane_b32 s7, v13
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-NEXT: s_and_saveexec_b32 s0, s0
-; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[17:18], v19, v[14:16], v[4:6]], s[4:7] a16
-; GFX11-NEXT: ; implicit-def: $vgpr10
+; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[17:18], v19, v[14:16], v[20:22]], s[4:7] a16
+; GFX11-NEXT: ; implicit-def: $vgpr4
; GFX11-NEXT: ; implicit-def: $vgpr17_vgpr18
; GFX11-NEXT: ; implicit-def: $vgpr19
; GFX11-NEXT: ; implicit-def: $vgpr14_vgpr15_vgpr16
-; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6
+; GFX11-NEXT: ; implicit-def: $vgpr20_vgpr21_vgpr22
; GFX11-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX11-NEXT: s_cbranch_execnz .LBB9_1
diff --git a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
index f990217aeae074..98b1b69101e51d 100644
--- a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
+++ b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir
@@ -23,14 +23,14 @@ body: |
; REG_ALLOC-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec
; REG_ALLOC-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc
; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
- ; REG_ALLOC-NEXT: renamable $vgpr0 = IMPLICIT_DEF
+ ; REG_ALLOC-NEXT: renamable $vgpr8 = IMPLICIT_DEF
; REG_ALLOC-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7
; REG_ALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
; REG_ALLOC-NEXT: S_BRANCH %bb.2
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: bb.1:
; REG_ALLOC-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
- ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
+ ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; REG_ALLOC-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc
@@ -42,7 +42,7 @@ body: |
; REG_ALLOC-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc
- ; REG_ALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1
+ ; REG_ALLOC-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1
; REG_ALLOC-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF
; REG_ALLOC-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
; REG_ALLOC-NEXT: S_BRANCH %bb.1
@@ -55,20 +55,20 @@ body: |
; REG_ALLOC-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
; REG_ALLOC-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc
; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
- ; REG_ALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1
+ ; REG_ALLOC-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1
; REG_ALLOC-NEXT: S_BRANCH %bb.5
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: bb.4:
- ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5
+ ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
- ; REG_ALLOC-NEXT: renamable $vgpr1 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec
- ; REG_ALLOC-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+ ; REG_ALLOC-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec
+ ; REG_ALLOC-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr8, killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
; REG_ALLOC-NEXT: S_ENDPGM 0
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: bb.5:
; REG_ALLOC-NEXT: successors: %bb.4(0x80000000)
- ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5
+ ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5
; REG_ALLOC-NEXT: {{ $}}
; REG_ALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
; REG_ALLOC-NEXT: S_BRANCH %bb.4
@@ -90,14 +90,14 @@ body: |
; DEAD_INST_DEL-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec
; DEAD_INST_DEL-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc
; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
- ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = IMPLICIT_DEF
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr8 = IMPLICIT_DEF
; DEAD_INST_DEL-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7
; DEAD_INST_DEL-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
; DEAD_INST_DEL-NEXT: S_BRANCH %bb.2
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: bb.1:
; DEAD_INST_DEL-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
- ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
+ ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; DEAD_INST_DEL-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc
@@ -109,6 +109,7 @@ body: |
; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1
; DEAD_INST_DEL-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF
; DEAD_INST_DEL-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
; DEAD_INST_DEL-NEXT: S_BRANCH %bb.1
@@ -121,20 +122,20 @@ body: |
; DEAD_INST_DEL-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec
; DEAD_INST_DEL-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc
; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc
- ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1
; DEAD_INST_DEL-NEXT: S_BRANCH %bb.5
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: bb.4:
- ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5
+ ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4)
- ; DEAD_INST_DEL-NEXT: renamable $vgpr1 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec
- ; DEAD_INST_DEL-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
+ ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec
+ ; DEAD_INST_DEL-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr8, killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
; DEAD_INST_DEL-NEXT: S_ENDPGM 0
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: bb.5:
; DEAD_INST_DEL-NEXT: successors: %bb.4(0x80000000)
- ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5
+ ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5
; DEAD_INST_DEL-NEXT: {{ $}}
; DEAD_INST_DEL-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
; DEAD_INST_DEL-NEXT: S_BRANCH %bb.4
More information about the llvm-commits
mailing list