[llvm] 1ceccbb - VirtRegRewriter: Add implicit register defs for live out undef lanes (#112679)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 28 17:33:56 PDT 2024


Author: Matt Arsenault
Date: 2024-10-28T17:33:53-07:00
New Revision: 1ceccbb0dd9d8539fec2213566fe6cc2a05b7993

URL: https://github.com/llvm/llvm-project/commit/1ceccbb0dd9d8539fec2213566fe6cc2a05b7993
DIFF: https://github.com/llvm/llvm-project/commit/1ceccbb0dd9d8539fec2213566fe6cc2a05b7993.diff

LOG: VirtRegRewriter: Add implicit register defs for live out undef lanes (#112679)

If an undef subregister def is live into another block, we need to
maintain a physreg def to track the liveness of those lanes. This
would manifest a verifier error after branch folding, when the cloned
tail block use no longer had a def.

We need to detect interference with other assigned intervals to avoid
clobbering the undef lanes defined in other intervals, since the undef
def didn't count as interference. This is pretty ugly and adds a new
dependency on LiveRegMatrix, keeping it live for one more pass. It also
adds a lot of implicit operand spam (we really should have a better
representation for this).

There is a missing verifier check for this situation. Added an xfailed
test that demonstrates this. We may also be able to revert the changes
in 47d3cbcf842a036c20c3f1c74255cdfc213f41c2.

It might be better to insert an IMPLICIT_DEF before the instruction
rather than using the implicit-def operand.

Fixes #98474

Added: 
    llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir
    llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
    llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
    llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir

Modified: 
    llvm/include/llvm/CodeGen/LiveRegMatrix.h
    llvm/lib/CodeGen/LiveRegMatrix.cpp
    llvm/lib/CodeGen/VirtRegMap.cpp
    llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
    llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
    llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
index 84050bf1707377..486392ca3c49d5 100644
--- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
@@ -118,6 +118,16 @@ class LiveRegMatrix {
   /// the segment [Start, End).
   bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg);
 
+  /// Check for interference in the segment [Start, End) that may prevent
+  /// assignment to PhysReg, like checkInterference. Returns a lane mask of
+  /// which lanes of the physical register interfere in the segment [Start, End)
+  /// of some other interval already assigned to PhysReg.
+  ///
+  /// If this function returns LaneBitmask::getNone(), PhysReg is completely
+  /// free at the segment [Start, End).
+  LaneBitmask checkInterferenceLanes(SlotIndex Start, SlotIndex End,
+                                     MCRegister PhysReg);
+
   /// Assign VirtReg to PhysReg.
   /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
   /// update VirtRegMap. The live range is expected to be available in PhysReg.

diff  --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index a57233cf37da01..bc8c59381a40e1 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -244,6 +244,41 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
   return false;
 }
 
+LaneBitmask LiveRegMatrix::checkInterferenceLanes(SlotIndex Start,
+                                                  SlotIndex End,
+                                                  MCRegister PhysReg) {
+  // Construct artificial live range containing only one segment [Start, End).
+  VNInfo valno(0, Start);
+  LiveRange::Segment Seg(Start, End, &valno);
+  LiveRange LR;
+  LR.addSegment(Seg);
+
+  LaneBitmask InterferingLanes;
+
+  // Check for interference with that segment
+  for (MCRegUnitMaskIterator MCRU(PhysReg, TRI); MCRU.isValid(); ++MCRU) {
+    auto [Unit, Lanes] = *MCRU;
+    // LR is stack-allocated. LiveRegMatrix caches queries by a key that
+    // includes the address of the live range. If (for the same reg unit) this
+    // checkInterference overload is called twice, without any other query()
+    // calls in between (on heap-allocated LiveRanges)  - which would invalidate
+    // the cached query - the LR address seen the second time may well be the
+    // same as that seen the first time, while the Start/End/valno may not - yet
+    // the same cached result would be fetched. To avoid that, we don't cache
+    // this query.
+    //
+    // FIXME: the usability of the Query API needs to be improved to avoid
+    // subtle bugs due to query identity. Avoiding caching, for example, would
+    // greatly simplify things.
+    LiveIntervalUnion::Query Q;
+    Q.reset(UserTag, LR, Matrix[Unit]);
+    if (Q.checkInterference())
+      InterferingLanes |= Lanes;
+  }
+
+  return InterferingLanes;
+}
+
 Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
   const LiveInterval *VRegInterval = nullptr;
   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {

diff  --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 46253b1743d97e..26a12512c87be0 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/LiveDebugVariables.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStacks.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -203,6 +204,7 @@ class VirtRegRewriter : public MachineFunctionPass {
   MachineRegisterInfo *MRI = nullptr;
   SlotIndexes *Indexes = nullptr;
   LiveIntervals *LIS = nullptr;
+  LiveRegMatrix *LRM = nullptr;
   VirtRegMap *VRM = nullptr;
   LiveDebugVariables *DebugVars = nullptr;
   DenseSet<Register> RewriteRegs;
@@ -215,6 +217,9 @@ class VirtRegRewriter : public MachineFunctionPass {
   void handleIdentityCopy(MachineInstr &MI);
   void expandCopyBundle(MachineInstr &MI) const;
   bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
+  LaneBitmask liveOutUndefPhiLanesForUndefSubregDef(
+      const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
+      MCPhysReg PhysReg, const MachineInstr &MI) const;
 
 public:
   static char ID;
@@ -247,6 +252,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
 INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
@@ -262,6 +268,7 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
   AU.addRequired<VirtRegMapWrapperLegacy>();
+  AU.addRequired<LiveRegMatrixWrapperLegacy>();
 
   if (!ClearVirtRegs)
     AU.addPreserved<LiveDebugVariables>();
@@ -276,6 +283,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
   MRI = &MF->getRegInfo();
   Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
   LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
   VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
   DebugVars = &getAnalysis<LiveDebugVariables>();
   LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
@@ -548,6 +556,40 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
   return false;
 }
 
+/// Compute a lanemask for undef lanes which need to be preserved out of the
+/// defining block for a register assignment for a subregister def. \p PhysReg
+/// is assigned to \p LI, which is the main range.
+LaneBitmask VirtRegRewriter::liveOutUndefPhiLanesForUndefSubregDef(
+    const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
+    MCPhysReg PhysReg, const MachineInstr &MI) const {
+  LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg);
+  LaneBitmask LiveOutUndefLanes;
+
+  for (const LiveInterval::SubRange &SR : LI.subranges()) {
+    // Figure out which lanes are undef live into a successor.
+    LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask;
+    if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) {
+      for (const MachineBasicBlock *Succ : MBB.successors()) {
+        if (LIS->isLiveInToMBB(SR, Succ))
+          LiveOutUndefLanes |= NeedImpDefLanes;
+      }
+    }
+  }
+
+  SlotIndex MIIndex = LIS->getInstructionIndex(MI);
+  SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
+  LaneBitmask InterferingLanes =
+      LRM->checkInterferenceLanes(BeforeMIUses, MIIndex.getRegSlot(), PhysReg);
+  LiveOutUndefLanes &= ~InterferingLanes;
+
+  LLVM_DEBUG(if (LiveOutUndefLanes.any()) {
+    dbgs() << "Need live out undef defs for " << printReg(PhysReg)
+           << LiveOutUndefLanes << " from " << printMBBReference(MBB) << '\n';
+  });
+
+  return LiveOutUndefLanes;
+}
+
 void VirtRegRewriter::rewrite() {
   bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
   SmallVector<Register, 8> SuperDeads;
@@ -602,6 +644,32 @@ void VirtRegRewriter::rewrite() {
                 MO.setIsUndef(true);
             } else if (!MO.isDead()) {
               assert(MO.isDef());
+              if (MO.isUndef()) {
+                const LiveInterval &LI = LIS->getInterval(VirtReg);
+
+                LaneBitmask LiveOutUndefLanes =
+                    liveOutUndefPhiLanesForUndefSubregDef(LI, *MBBI, SubReg,
+                                                          PhysReg, MI);
+                if (LiveOutUndefLanes.any()) {
+                  SmallVector<unsigned, 16> CoveringIndexes;
+
+                  // TODO: Just use one super register def if none of the lanes
+                  // are needed?
+                  if (!TRI->getCoveringSubRegIndexes(
+                          *MRI, MRI->getRegClass(VirtReg), LiveOutUndefLanes,
+                          CoveringIndexes))
+                    llvm_unreachable(
+                        "cannot represent required subregister defs");
+
+                  // Try to represent the minimum needed live out def as a
+                  // sequence of subregister defs.
+                  //
+                  // FIXME: It would be better if we could directly represent
+                  // liveness with a lanemask instead of spamming operands.
+                  for (unsigned SubIdx : CoveringIndexes)
+                    SuperDefs.push_back(TRI->getSubReg(PhysReg, SubIdx));
+                }
+              }
             }
           }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 86254329923971..055e9850de3d68 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -38,24 +38,19 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr30_sgpr31 = S_MOV_B64 0
   ; GFX90A-NEXT:   renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
-  ; GFX90A-NEXT:   $vgpr22 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr10 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr24 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr18 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr20 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_CBRANCH_VCCNZ %bb.59, implicit $vcc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   successors: %bb.3(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20
+  ; GFX90A-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $sgpr23 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr19 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr21 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr23 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr25 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
+  ; GFX90A-NEXT:   renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
+  ; GFX90A-NEXT:   renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
+  ; GFX90A-NEXT:   renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
+  ; GFX90A-NEXT:   renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
   ; GFX90A-NEXT:   renamable $sgpr28_sgpr29 = S_MOV_B64 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.3.Flow17:
@@ -111,8 +106,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.6.Flow20:
@@ -395,8 +390,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.37, implicit $exec
@@ -434,8 +429,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.39, implicit $exec
@@ -484,8 +479,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.41, implicit $exec
@@ -535,8 +530,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.47, implicit $exec
@@ -589,8 +584,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $sgpr44_sgpr45 = S_MOV_B64 0
   ; GFX90A-NEXT: {{  $}}
@@ -643,8 +638,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr16_sgpr17 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.43, implicit $exec
@@ -689,8 +684,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.45
   ; GFX90A-NEXT: {{  $}}
@@ -719,8 +714,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.46
   ; GFX90A-NEXT: {{  $}}
@@ -748,8 +743,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.62
   ; GFX90A-NEXT: {{  $}}
@@ -773,8 +768,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr58_sgpr59 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.53, implicit $exec
@@ -880,8 +875,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   $sgpr50_sgpr51 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.57, implicit $exec
   ; GFX90A-NEXT: {{  $}}

diff  --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
index 6603f2ef7adef7..7421a2e10c3b57 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
@@ -32,7 +32,7 @@ body:             |
   ; CHECK-NEXT:   dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.6, implicit $exec
@@ -82,7 +82,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
   ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
-  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24
+  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.6
   ; CHECK-NEXT: {{  $}}

diff  --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
index fa95f4c1341742..8ae6c279558961 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
@@ -30,7 +30,7 @@ body:             |
   ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
-  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
@@ -78,7 +78,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX8_IMM undef renamable $sgpr4_sgpr5, 32, 0 :: (invariant load (s256), addrspace 4)
   ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
-  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24
+  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.5
   ; CHECK-NEXT: {{  $}}

diff  --git a/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir
new file mode 100644
index 00000000000000..786ce402038369
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir
@@ -0,0 +1,55 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr4_vgpr5
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr3_vgpr4_vgpr5 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0, $vgpr2
+
+    %2:vgpr_32 = COPY $vgpr2
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
new file mode 100644
index 00000000000000..7caa563d8b2983
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
+
+; Check for verifier error after tail duplication. An implicit_def of
+; a subregsiter is needed to maintain liveness after assignment.
+
+define amdgpu_vs void @test(i32 inreg %cmp, i32 %e0) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_cmp_eq_u32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_2
+; CHECK-NEXT:  ; %bb.1: ; %load
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    s_mov_b32 s2, s0
+; CHECK-NEXT:    s_mov_b32 s3, s0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    buffer_load_format_xy v[1:2], v1, s[0:3], 0 idxen
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    exp mrt0 v0, v1, v2, v0
+; CHECK-NEXT:    s_endpgm
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    exp mrt0 v0, v1, v2, v0
+; CHECK-NEXT:    s_endpgm
+entry:
+  %cond = icmp eq i32 %cmp, 0
+  br i1 %cond, label %end, label %load
+
+load:
+  %data1 = call <2 x i32> @llvm.amdgcn.struct.buffer.load.format.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
+  %e1 = extractelement <2 x i32> %data1, i32 0
+  %e2 = extractelement <2 x i32> %data1, i32 1
+  br label %end
+
+end:
+  %out1 = phi i32 [ 0, %entry ], [ %e1, %load ]
+  %out2 = phi i32 [ poison, %entry ], [ %e2, %load ]
+  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 %e0, i32 %out1, i32 %out2, i32 %e0, i1 false, i1 false)
+  ret void
+}
+

diff  --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
new file mode 100644
index 00000000000000..86b6c5982b4cbd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
@@ -0,0 +1,363 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s
+
+# The partial def of %0 introduces a live out undef def of %0.sub1
+# into bb.3. We need to maintain this liveness with an explicit def of
+# the physical subregister. Without this, a verifier error would
+# appear after tail duplication.
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr2, implicit-def $vgpr0
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# Test another use of the value before the block end.
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1
+  ; CHECK-NEXT:   S_NOP 0, implicit renamable $vgpr0_vgpr1
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    S_NOP 0, implicit %0
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# The undef subregister is not live out, no implicit def should be added for it
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub0, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# In bb.2, %0 should be assigned to vgpr0_vgpr1. Make sure the value
+# copied from $vgpr0 into %3 isn't clobbered by the undef phi def for
+# %0.sub1.
+---
+name:            assigned_physreg_subregister_interference
+tracksRegLiveness: true
+frameInfo:
+  adjustsStack: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  wwmReservedRegs:
+    - '$vgpr63'
+body:             |
+  ; CHECK-LABEL: name: assigned_physreg_subregister_interference
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr34, 2, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr35, 3, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr36, 4, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr37, 5, $vgpr40
+  ; CHECK-NEXT:   renamable $sgpr34_sgpr35 = S_MOV_B64 $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr34_sgpr35, $vgpr0_vgpr1:0x000000000000000F
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr5 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; CHECK-NEXT:   renamable $vcc = V_CMP_EQ_U64_e64 $sgpr4_sgpr5, killed $vgpr0_vgpr1, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 killed renamable $vcc, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = noconvergent SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; CHECK-NEXT:   renamable $vgpr1 = COPY $vgpr0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 123, implicit $exec
+  ; CHECK-NEXT:   $exec = S_XOR_B64 $exec, renamable $sgpr36_sgpr37, implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr1, $sgpr34_sgpr35
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = COPY renamable $sgpr34_sgpr35
+  ; CHECK-NEXT:   renamable $vgpr0 = V_ADD_U32_e32 1, killed $vgpr1, implicit $exec
+  ; CHECK-NEXT:   $sgpr37 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 5
+  ; CHECK-NEXT:   $sgpr36 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 4
+  ; CHECK-NEXT:   $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 3
+  ; CHECK-NEXT:   $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 2
+  ; CHECK-NEXT:   $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 1
+  ; CHECK-NEXT:   $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
+  bb.0:
+    liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1, $vgpr63
+
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr34, 2, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr35, 3, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr36, 4, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr37, 5, $vgpr63
+    undef %0.sub0:vreg_64 = COPY $vgpr0
+    %0.sub1:vreg_64 = COPY $vgpr1
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    renamable $sgpr34_sgpr35 = S_MOV_B64 $exec
+
+  bb.1:
+    liveins: $vgpr63, $sgpr34_sgpr35
+
+    renamable $sgpr4 = V_READFIRSTLANE_B32 %0.sub0, implicit $exec
+    renamable $sgpr5 = V_READFIRSTLANE_B32 %0.sub1, implicit $exec
+    renamable $vcc = V_CMP_EQ_U64_e64 $sgpr4_sgpr5, %0, implicit $exec
+    renamable $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 killed renamable $vcc, implicit-def $exec, implicit-def dead $scc, implicit $exec
+
+  bb.2:
+    liveins: $vgpr63, $sgpr4_sgpr5:0x000000000000000F, $sgpr34_sgpr35, $sgpr36_sgpr37
+
+    dead $sgpr30_sgpr31 = noconvergent SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+    %3:vgpr_32 = COPY $vgpr0
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 123, implicit $exec
+    $exec = S_XOR_B64_term $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+
+  bb.3:
+    liveins: $vgpr63, $sgpr34_sgpr35
+
+    $exec = S_MOV_B64_term killed renamable $sgpr34_sgpr35
+
+  bb.4:
+    liveins: $vgpr63
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    %6:vgpr_32 = V_ADD_U32_e32 1, %3, implicit $exec
+    $vgpr0 = COPY %6
+    $sgpr37 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 5
+    $sgpr36 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 4
+    $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 3
+    $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 2
+    $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 1
+    $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0
+    SI_RETURN implicit $vgpr0
+
+...

diff  --git a/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir b/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir
new file mode 100644
index 00000000000000..892a4298bbdb51
--- /dev/null
+++ b/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir
@@ -0,0 +1,36 @@
+# XFAIL: *
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=none -filetype=null %s
+
+# FIXME: This should fail the machine verifier. There is a missing def
+# of $vgpr2 in bb.1, which is needed since it's live into bb.3
+
+---
+name: missing_live_out_subreg_def
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    liveins: $vgpr0
+
+    renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    liveins: $vgpr0
+
+    renamable $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+    renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    liveins: $vgpr0, $vgpr1_vgpr2
+
+    EXP 0, killed renamable $vgpr0, killed renamable $vgpr1, renamable $vgpr2, renamable $vgpr0, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list