[llvm] VirtRegRewriter: Add super register defs for live out undef lanes (PR #112679)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 21 16:37:19 PDT 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/112679

>From 7a2b0b57e4d4c61b160abaae210938c1e44ba438 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 17 Aug 2024 10:36:09 +0400
Subject: [PATCH] VirtRegRewriter: Add super register defs for live out undef
 lanes

If an undef subregister def is live into another block, we need to
maintain a physreg def to track the liveness of those lanes. This
would manifest a verifier error after branch folding, when the cloned
tail block use no longer had a def.

There is a missing verifier check for this situation. Added an xfailed
test that demonstrates this. We may also be able to revert the changes
in 47d3cbcf842a036c20c3f1c74255cdfc213f41c2.

It might be better to insert an IMPLICIT_DEF before the instruction
rather than using the implicit-def operand.

Fixes #98474
---
 llvm/lib/CodeGen/VirtRegMap.cpp               |  28 ++
 .../branch-folding-implicit-def-subreg.ll     |  65 +++--
 llvm/test/CodeGen/AMDGPU/indirect-call.ll     |   2 -
 ...nfloop-subrange-spill-inspect-subrange.mir |   4 +-
 .../CodeGen/AMDGPU/infloop-subrange-spill.mir |   4 +-
 ...474-need-live-out-undef-subregister-def.ll |  42 +++
 ...egrewriter-live-out-undef-subregisters.mir | 251 ++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/itofp.i128.ll        |   8 +-
 .../test/CodeGen/AMDGPU/vni8-across-blocks.ll |   2 +-
 ...ssing-def-liveout-physical-subregister.mir |  36 +++
 10 files changed, 396 insertions(+), 46 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
 create mode 100644 llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir

diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index a548bf665bcea85..1254c7be1821466 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -199,6 +199,9 @@ class VirtRegRewriter : public MachineFunctionPass {
   void handleIdentityCopy(MachineInstr &MI);
   void expandCopyBundle(MachineInstr &MI) const;
   bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
+  bool needLiveOutUndefSubregDef(const LiveInterval &LI,
+                                 const MachineBasicBlock &MBB, unsigned SubReg,
+                                 MCPhysReg PhysReg) const;
 
 public:
   static char ID;
@@ -532,6 +535,26 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
   return false;
 }
 
+/// Check if we need to maintain liveness for undef subregister lanes that are
+/// live out of a block.
+bool VirtRegRewriter::needLiveOutUndefSubregDef(const LiveInterval &LI,
+                                                const MachineBasicBlock &MBB,
+                                                unsigned SubReg,
+                                                MCPhysReg PhysReg) const {
+  LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg);
+  for (const LiveInterval::SubRange &SR : LI.subranges()) {
+    LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask;
+    if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) {
+      for (const MachineBasicBlock *Succ : MBB.successors()) {
+        if (LIS->isLiveInToMBB(SR, Succ))
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 void VirtRegRewriter::rewrite() {
   bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
   SmallVector<Register, 8> SuperDeads;
@@ -586,6 +609,11 @@ void VirtRegRewriter::rewrite() {
                 MO.setIsUndef(true);
             } else if (!MO.isDead()) {
               assert(MO.isDef());
+              if (MO.isUndef()) {
+                const LiveInterval &LI = LIS->getInterval(VirtReg);
+                if (needLiveOutUndefSubregDef(LI, *MBBI, SubReg, PhysReg))
+                  SuperDefs.push_back(PhysReg);
+              }
             }
           }
 
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 862543299239717..055e9850de3d68a 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -38,24 +38,19 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr30_sgpr31 = S_MOV_B64 0
   ; GFX90A-NEXT:   renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
-  ; GFX90A-NEXT:   $vgpr22 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr10 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr24 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr18 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr20 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_CBRANCH_VCCNZ %bb.59, implicit $vcc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   successors: %bb.3(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20
+  ; GFX90A-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $sgpr23 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr19 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr21 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr23 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr25 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
+  ; GFX90A-NEXT:   renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
+  ; GFX90A-NEXT:   renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
+  ; GFX90A-NEXT:   renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
+  ; GFX90A-NEXT:   renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
   ; GFX90A-NEXT:   renamable $sgpr28_sgpr29 = S_MOV_B64 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.3.Flow17:
@@ -111,8 +106,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.6.Flow20:
@@ -395,8 +390,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.37, implicit $exec
@@ -434,8 +429,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.39, implicit $exec
@@ -484,8 +479,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.41, implicit $exec
@@ -535,8 +530,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.47, implicit $exec
@@ -589,8 +584,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $sgpr44_sgpr45 = S_MOV_B64 0
   ; GFX90A-NEXT: {{  $}}
@@ -643,8 +638,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr16_sgpr17 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.43, implicit $exec
@@ -689,8 +684,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.45
   ; GFX90A-NEXT: {{  $}}
@@ -719,8 +714,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.46
   ; GFX90A-NEXT: {{  $}}
@@ -748,8 +743,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.62
   ; GFX90A-NEXT: {{  $}}
@@ -773,8 +768,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr58_sgpr59 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.53, implicit $exec
@@ -880,8 +875,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   $sgpr50_sgpr51 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.57, implicit $exec
   ; GFX90A-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index da8aa5446983558..e819d5d3b1656e0 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -603,7 +603,6 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
 ; GISEL-NEXT:    s_mov_b32 s14, s43
 ; GISEL-NEXT:    s_mov_b32 s15, s42
 ; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    v_mov_b32_e32 v1, v0
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GISEL-NEXT:    ; implicit-def: $vgpr31
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
@@ -1384,7 +1383,6 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GISEL-NEXT:    v_mov_b32_e32 v2, v0
 ; GISEL-NEXT:    ; implicit-def: $vgpr1
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
index 7864564d2891785..285e7e22264a041 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
@@ -30,7 +30,7 @@ body:             |
   ; CHECK-NEXT:   dead [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
-  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.6, implicit $exec
@@ -83,7 +83,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
-  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24
+  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.6
   ; CHECK-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
index 1030cdb1b43fc13..995a5d267fbed15 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
@@ -30,7 +30,7 @@ body:             |
   ; CHECK-NEXT:   dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
   ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
-  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
@@ -80,7 +80,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
-  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24
+  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.5
   ; CHECK-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
new file mode 100644
index 000000000000000..7caa563d8b29830
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
+
+; Check for verifier error after tail duplication. An implicit_def of
+; a subregsiter is needed to maintain liveness after assignment.
+
+define amdgpu_vs void @test(i32 inreg %cmp, i32 %e0) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_cmp_eq_u32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_2
+; CHECK-NEXT:  ; %bb.1: ; %load
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    s_mov_b32 s2, s0
+; CHECK-NEXT:    s_mov_b32 s3, s0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    buffer_load_format_xy v[1:2], v1, s[0:3], 0 idxen
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    exp mrt0 v0, v1, v2, v0
+; CHECK-NEXT:    s_endpgm
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    exp mrt0 v0, v1, v2, v0
+; CHECK-NEXT:    s_endpgm
+entry:
+  %cond = icmp eq i32 %cmp, 0
+  br i1 %cond, label %end, label %load
+
+load:
+  %data1 = call <2 x i32> @llvm.amdgcn.struct.buffer.load.format.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
+  %e1 = extractelement <2 x i32> %data1, i32 0
+  %e2 = extractelement <2 x i32> %data1, i32 1
+  br label %end
+
+end:
+  %out1 = phi i32 [ 0, %entry ], [ %e1, %load ]
+  %out2 = phi i32 [ poison, %entry ], [ %e2, %load ]
+  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 %e0, i32 %out1, i32 %out2, i32 %e0, i1 false, i1 false)
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
new file mode 100644
index 000000000000000..a8ed114f8cd7835
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
@@ -0,0 +1,251 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s
+
+# The partial def of %0 introduces a live out undef def of %0.sub1
+# into bb.3. We need to maintain this liveness with an explicit def of
+# the physical subregister. Without this, a verifier error would
+# appear after tail duplication.
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr0_vgpr1
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# Test another use of the value before the block end.
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr0_vgpr1
+  ; CHECK-NEXT:   S_NOP 0, implicit renamable $vgpr0_vgpr1
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    S_NOP 0, implicit %0
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# The undef subregister is not live out, no implicit def should be added for it
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub0, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index 2999ddb8315883e..7e76e2bf9e89494 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -172,8 +172,8 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr8
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr8
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    ; implicit-def: $vgpr2
 ; GISEL-NEXT:  ; %bb.3: ; %Flow3
@@ -417,8 +417,8 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    ; implicit-def: $vgpr2
 ; GISEL-NEXT:  ; %bb.3: ; %Flow3
@@ -1263,8 +1263,8 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr8
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr8
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    ; implicit-def: $vgpr2
 ; GISEL-NEXT:  ; %bb.3: ; %Flow3
@@ -1510,8 +1510,8 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr7
 ; GISEL-NEXT:    ; implicit-def: $vgpr5
 ; GISEL-NEXT:    ; implicit-def: $vgpr2
 ; GISEL-NEXT:  ; %bb.3: ; %Flow3
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index 2d5e5a9160fdf7d..6c56dee76142c15 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -526,7 +526,6 @@ define amdgpu_kernel void @v8i8_phi_const(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX906-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX906-NEXT:    v_cmp_lt_u32_e64 s[0:1], 14, v0
 ; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 15, v0
-; GFX906-NEXT:    ; implicit-def: $vgpr3
 ; GFX906-NEXT:    ; implicit-def: $vgpr13
 ; GFX906-NEXT:    ; implicit-def: $vgpr11
 ; GFX906-NEXT:    ; implicit-def: $vgpr14
@@ -535,6 +534,7 @@ define amdgpu_kernel void @v8i8_phi_const(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX906-NEXT:    ; implicit-def: $vgpr16
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v4, s[4:5]
+; GFX906-NEXT:    ; implicit-def: $vgpr3
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
 ; GFX906-NEXT:    v_lshrrev_b32_e32 v5, 24, v2
 ; GFX906-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
diff --git a/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir b/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir
new file mode 100644
index 000000000000000..892a4298bbdb518
--- /dev/null
+++ b/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir
@@ -0,0 +1,36 @@
+# XFAIL: *
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=none -filetype=null %s
+
+# FIXME: This should fail the machine verifier. There is a missing def
+# of $vgpr2 in bb.1, which is needed since it's live into bb.3
+
+---
+name: missing_live_out_subreg_def
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    liveins: $vgpr0
+
+    renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    liveins: $vgpr0
+
+    renamable $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+    renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    liveins: $vgpr0, $vgpr1_vgpr2
+
+    EXP 0, killed renamable $vgpr0, killed renamable $vgpr1, renamable $vgpr2, renamable $vgpr0, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...



More information about the llvm-commits mailing list