[llvm] a274ffe - [MachineSink] Remove subrange of live-ins from super register as well. (#159145)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 22:45:30 PDT 2025
Author: Pete Chou
Date: 2025-09-26T14:45:26+09:00
New Revision: a274ffe2597ca53d828d08d44af0f44d3b823507
URL: https://github.com/llvm/llvm-project/commit/a274ffe2597ca53d828d08d44af0f44d3b823507
DIFF: https://github.com/llvm/llvm-project/commit/a274ffe2597ca53d828d08d44af0f44d3b823507.diff
LOG: [MachineSink] Remove subrange of live-ins from super register as well. (#159145)
Post-RA machine sinking could sink a copy of sub-register into
a successor. However, the sub-register might not be removed from the
live-in bitmask of its super register in successor and then a later
pass, e.g, if-converter, may add an implicit use of the register from
live-in resulting in an use of an undefined register. This change makes
sure subrange of live-ins from super register could be removed as well.
Added:
llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir
Modified:
llvm/include/llvm/CodeGen/MachineBasicBlock.h
llvm/lib/CodeGen/MachineBasicBlock.cpp
llvm/lib/CodeGen/MachineSink.cpp
llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 80422db61c045..6d026796e93b7 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -505,6 +505,11 @@ class MachineBasicBlock
LLVM_ABI void removeLiveIn(MCRegister Reg,
LaneBitmask LaneMask = LaneBitmask::getAll());
+ /// Remove the specified register from any overlapped live in. The method is
+ /// subreg-aware and removes Reg and its subregs from the live in set. It also
+ /// clears the corresponding bitmask from its live-in super registers.
+ LLVM_ABI void removeLiveInOverlappedWith(MCRegister Reg);
+
/// Return true if the specified register is in the live in set.
LLVM_ABI bool isLiveIn(MCRegister Reg,
LaneBitmask LaneMask = LaneBitmask::getAll()) const;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index d4b64ab9db6de..bc1df26db2684 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -606,6 +606,26 @@ void MachineBasicBlock::removeLiveIn(MCRegister Reg, LaneBitmask LaneMask) {
LiveIns.erase(I);
}
+void MachineBasicBlock::removeLiveInOverlappedWith(MCRegister Reg) {
+ const MachineFunction *MF = getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ // Remove Reg and its subregs from live in set.
+ for (MCPhysReg S : TRI->subregs_inclusive(Reg))
+ removeLiveIn(S);
+
+ // Remove live-in bitmask in super registers as well.
+ for (MCPhysReg Super : TRI->superregs(Reg)) {
+ for (MCSubRegIndexIterator SRI(Super, TRI); SRI.isValid(); ++SRI) {
+ if (Reg == SRI.getSubReg()) {
+ unsigned SubRegIndex = SRI.getSubRegIndex();
+ LaneBitmask SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex);
+ removeLiveIn(Super, SubRegLaneMask);
+ break;
+ }
+ }
+ }
+}
+
MachineBasicBlock::livein_iterator
MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) {
// Get non-const version of iterator.
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 9ec5151a039b7..d5153b7fb6207 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -2187,11 +2187,9 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
const SmallVectorImpl<unsigned> &UsedOpsInCopy,
const SmallVectorImpl<Register> &DefedRegsInCopy) {
- MachineFunction &MF = *SuccBB->getParent();
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (Register DefReg : DefedRegsInCopy)
- for (MCPhysReg S : TRI->subregs_inclusive(DefReg))
- SuccBB->removeLiveIn(S);
+ SuccBB->removeLiveInOverlappedWith(DefReg);
+
for (auto U : UsedOpsInCopy)
SuccBB->addLiveIn(MI->getOperand(U).getReg());
SuccBB->sortUniqueLiveIns();
diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
index c456f9c4b16e5..a2ec87053a8d5 100644
--- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
@@ -49,7 +49,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F
+ ; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr57 = COPY $vgpr9, implicit $exec
; GCN-NEXT: renamable $vgpr56 = COPY $vgpr8, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir b/llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir
new file mode 100644
index 0000000000000..eb48ff08f1b7c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir
@@ -0,0 +1,113 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
+
+# Test live-in with subrange is updated accordingly in postra-machine-sink.
+---
+name: test_postra_machine_sink_livein_update
+tracksRegLiveness: true
+frameInfo:
+ adjustsStack: true
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ ; GCN-LABEL: name: test_postra_machine_sink_livein_update
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GCN-NEXT: liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30_sgpr31
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: renamable $vgpr44 = COPY $vgpr13, implicit $exec
+ ; GCN-NEXT: renamable $vgpr43 = COPY $vgpr12, implicit $exec
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: renamable $vgpr57 = COPY $vgpr9, implicit $exec
+ ; GCN-NEXT: renamable $vgpr56 = COPY $vgpr8, implicit $exec
+ ; GCN-NEXT: renamable $vgpr59 = COPY $vgpr7, implicit $exec
+ ; GCN-NEXT: renamable $vgpr58 = COPY $vgpr6, implicit $exec
+ ; GCN-NEXT: renamable $vgpr61 = COPY $vgpr5, implicit $exec
+ ; GCN-NEXT: renamable $vgpr60 = COPY $vgpr4, implicit $exec
+ ; GCN-NEXT: renamable $vgpr42 = COPY $vgpr3, implicit $exec
+ ; GCN-NEXT: renamable $vgpr41 = COPY $vgpr2, implicit $exec
+ ; GCN-NEXT: renamable $vgpr46 = COPY $vgpr1, implicit $exec
+ ; GCN-NEXT: renamable $vgpr45 = COPY $vgpr0, implicit $exec
+ ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
+ ; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
+ ; GCN-NEXT: SI_SPILL_AV64_SAVE killed $vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: SI_SPILL_AV64_SAVE killed $vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
+ ; GCN-NEXT: renamable $vgpr14_vgpr15 = SI_SPILL_AV64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+ ; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+ ; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
+ bb.0:
+ successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30_sgpr31
+
+ renamable $vgpr44 = COPY $vgpr13, implicit $exec
+ renamable $vgpr43 = COPY $vgpr12, implicit $exec
+ renamable $vgpr57 = COPY $vgpr9, implicit $exec
+ renamable $vgpr56 = COPY $vgpr8, implicit $exec
+ renamable $vgpr59 = COPY $vgpr7, implicit $exec
+ renamable $vgpr58 = COPY $vgpr6, implicit $exec
+ renamable $vgpr61 = COPY $vgpr5, implicit $exec
+ renamable $vgpr60 = COPY $vgpr4, implicit $exec
+ renamable $vgpr42 = COPY $vgpr3, implicit $exec
+ renamable $vgpr41 = COPY $vgpr2, implicit $exec
+ renamable $vgpr46 = COPY $vgpr1, implicit $exec
+ renamable $vgpr45 = COPY $vgpr0, implicit $exec
+ S_CBRANCH_SCC1 %bb.2, implicit undef $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+ liveins: $sgpr30, $sgpr31, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F
+
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ renamable $sgpr16_sgpr17 = IMPLICIT_DEF
+ $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
+ $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
+ SI_SPILL_AV64_SAVE killed $vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
+ SI_SPILL_AV64_SAVE killed $vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
+ renamable $vgpr14_vgpr15 = SI_SPILL_AV64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+ renamable $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
+ FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+
+ bb.2:
+ liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
+
+ renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
+ FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+ FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+ S_SETPC_B64_return undef $sgpr30_sgpr31
+...
More information about the llvm-commits
mailing list