[llvm] a274ffe - [MachineSink] Remove subrange of live-ins from super register as well. (#159145)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 22:45:30 PDT 2025


Author: Pete Chou
Date: 2025-09-26T14:45:26+09:00
New Revision: a274ffe2597ca53d828d08d44af0f44d3b823507

URL: https://github.com/llvm/llvm-project/commit/a274ffe2597ca53d828d08d44af0f44d3b823507
DIFF: https://github.com/llvm/llvm-project/commit/a274ffe2597ca53d828d08d44af0f44d3b823507.diff

LOG: [MachineSink] Remove subrange of live-ins from super register as well. (#159145)

Post-RA machine sinking could sink a copy of sub-register into
a successor. However, the sub-register might not be removed from the
live-in bitmask of its super register in successor and then a later
pass, e.g, if-converter, may add an implicit use of the register from
live-in resulting in an use of an undefined register. This change makes
sure subrange of live-ins from super register could be removed as well.

Added: 
    llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir

Modified: 
    llvm/include/llvm/CodeGen/MachineBasicBlock.h
    llvm/lib/CodeGen/MachineBasicBlock.cpp
    llvm/lib/CodeGen/MachineSink.cpp
    llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 80422db61c045..6d026796e93b7 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -505,6 +505,11 @@ class MachineBasicBlock
   LLVM_ABI void removeLiveIn(MCRegister Reg,
                              LaneBitmask LaneMask = LaneBitmask::getAll());
 
+  /// Remove the specified register from any overlapped live in. The method is
+  /// subreg-aware and removes Reg and its subregs from the live in set. It also
+  /// clears the corresponding bitmask from its live-in super registers.
+  LLVM_ABI void removeLiveInOverlappedWith(MCRegister Reg);
+
   /// Return true if the specified register is in the live in set.
   LLVM_ABI bool isLiveIn(MCRegister Reg,
                          LaneBitmask LaneMask = LaneBitmask::getAll()) const;

diff  --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index d4b64ab9db6de..bc1df26db2684 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -606,6 +606,26 @@ void MachineBasicBlock::removeLiveIn(MCRegister Reg, LaneBitmask LaneMask) {
     LiveIns.erase(I);
 }
 
+void MachineBasicBlock::removeLiveInOverlappedWith(MCRegister Reg) {
+  const MachineFunction *MF = getParent();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+  // Remove Reg and its subregs from live in set.
+  for (MCPhysReg S : TRI->subregs_inclusive(Reg))
+    removeLiveIn(S);
+
+  // Remove live-in bitmask in super registers as well.
+  for (MCPhysReg Super : TRI->superregs(Reg)) {
+    for (MCSubRegIndexIterator SRI(Super, TRI); SRI.isValid(); ++SRI) {
+      if (Reg == SRI.getSubReg()) {
+        unsigned SubRegIndex = SRI.getSubRegIndex();
+        LaneBitmask SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex);
+        removeLiveIn(Super, SubRegLaneMask);
+        break;
+      }
+    }
+  }
+}
+
 MachineBasicBlock::livein_iterator
 MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) {
   // Get non-const version of iterator.

diff  --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 9ec5151a039b7..d5153b7fb6207 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -2187,11 +2187,9 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
 static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
                          const SmallVectorImpl<unsigned> &UsedOpsInCopy,
                          const SmallVectorImpl<Register> &DefedRegsInCopy) {
-  MachineFunction &MF = *SuccBB->getParent();
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   for (Register DefReg : DefedRegsInCopy)
-    for (MCPhysReg S : TRI->subregs_inclusive(DefReg))
-      SuccBB->removeLiveIn(S);
+    SuccBB->removeLiveInOverlappedWith(DefReg);
+
   for (auto U : UsedOpsInCopy)
     SuccBB->addLiveIn(MI->getOperand(U).getReg());
   SuccBB->sortUniqueLiveIns();

diff  --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
index c456f9c4b16e5..a2ec87053a8d5 100644
--- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
@@ -49,7 +49,7 @@ body:             |
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.1:
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
-  ; GCN-NEXT:   liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F
+  ; GCN-NEXT:   liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   renamable $vgpr57 = COPY $vgpr9, implicit $exec
   ; GCN-NEXT:   renamable $vgpr56 = COPY $vgpr8, implicit $exec

diff  --git a/llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir b/llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir
new file mode 100644
index 0000000000000..eb48ff08f1b7c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/postra-machine-sink-livein-subrange.mir
@@ -0,0 +1,113 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
+
+# Test live-in with subrange is updated accordingly in postra-machine-sink.
+---
+name:            test_postra_machine_sink_livein_update
+tracksRegLiveness: true
+frameInfo:
+  adjustsStack:    true
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; GCN-LABEL: name: test_postra_machine_sink_livein_update
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; GCN-NEXT:   liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   renamable $vgpr44 = COPY $vgpr13, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr43 = COPY $vgpr12, implicit $exec
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit undef $scc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT:   liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   renamable $vgpr57 = COPY $vgpr9, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr56 = COPY $vgpr8, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr59 = COPY $vgpr7, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr58 = COPY $vgpr6, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr61 = COPY $vgpr5, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr60 = COPY $vgpr4, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr42 = COPY $vgpr3, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr41 = COPY $vgpr2, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr46 = COPY $vgpr1, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr45 = COPY $vgpr0, implicit $exec
+  ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; GCN-NEXT:   renamable $sgpr16_sgpr17 = IMPLICIT_DEF
+  ; GCN-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
+  ; GCN-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
+  ; GCN-NEXT:   SI_SPILL_AV64_SAVE killed $vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
+  ; GCN-NEXT:   SI_SPILL_AV64_SAVE killed $vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
+  ; GCN-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
+  ; GCN-NEXT:   renamable $vgpr14_vgpr15 = SI_SPILL_AV64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+  ; GCN-NEXT:   renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
+  ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+  ; GCN-NEXT:   renamable $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
+  ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
+  ; GCN-NEXT:   FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+  ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+  ; GCN-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
+  bb.0:
+    successors: %bb.2(0x40000000), %bb.1(0x40000000)
+    liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30_sgpr31
+
+    renamable $vgpr44 = COPY $vgpr13, implicit $exec
+    renamable $vgpr43 = COPY $vgpr12, implicit $exec
+    renamable $vgpr57 = COPY $vgpr9, implicit $exec
+    renamable $vgpr56 = COPY $vgpr8, implicit $exec
+    renamable $vgpr59 = COPY $vgpr7, implicit $exec
+    renamable $vgpr58 = COPY $vgpr6, implicit $exec
+    renamable $vgpr61 = COPY $vgpr5, implicit $exec
+    renamable $vgpr60 = COPY $vgpr4, implicit $exec
+    renamable $vgpr42 = COPY $vgpr3, implicit $exec
+    renamable $vgpr41 = COPY $vgpr2, implicit $exec
+    renamable $vgpr46 = COPY $vgpr1, implicit $exec
+    renamable $vgpr45 = COPY $vgpr0, implicit $exec
+    S_CBRANCH_SCC1 %bb.2, implicit undef $scc
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+    liveins: $sgpr30, $sgpr31, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F
+
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    renamable $sgpr16_sgpr17 = IMPLICIT_DEF
+    $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
+    $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
+    SI_SPILL_AV64_SAVE killed $vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
+    SI_SPILL_AV64_SAVE killed $vgpr10_vgpr11, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
+    renamable $vgpr14_vgpr15 = SI_SPILL_AV64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+    renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+    renamable $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
+    FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+
+  bb.2:
+    liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
+
+    renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
+    FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+    FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
+    S_SETPC_B64_return undef $sgpr30_sgpr31
+...


        


More information about the llvm-commits mailing list