[llvm] VirtRegRewriter: Add super register defs for live out undef lanes (PR #112679)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 17 01:36:50 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-regalloc

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

If an undef subregister def is live into another block, we need to
maintain a physreg def to track the liveness of those lanes. This
would manifest a verifier error after branch folding, when the cloned
tail block use no longer had a def.

There is a missing verifier check for this situation. Added an xfailed
test that demonstrates this. We may also be able to revert the changes
in 47d3cbcf842a036c20c3f1c74255cdfc213f41c2.

It might be better to insert an IMPLICIT_DEF before the instruction
rather than using the implicit-def operand.

Fixes #<!-- -->98474

---

Patch is 35.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112679.diff


10 Files Affected:

- (modified) llvm/lib/CodeGen/VirtRegMap.cpp (+28) 
- (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+30-35) 
- (modified) llvm/test/CodeGen/AMDGPU/indirect-call.ll (-2) 
- (modified) llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir (+2-2) 
- (added) llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll (+42) 
- (added) llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir (+251) 
- (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll (+1-1) 
- (added) llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir (+36) 


``````````diff
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index a548bf665bcea8..1254c7be182146 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -199,6 +199,9 @@ class VirtRegRewriter : public MachineFunctionPass {
   void handleIdentityCopy(MachineInstr &MI);
   void expandCopyBundle(MachineInstr &MI) const;
   bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
+  bool needLiveOutUndefSubregDef(const LiveInterval &LI,
+                                 const MachineBasicBlock &MBB, unsigned SubReg,
+                                 MCPhysReg PhysReg) const;
 
 public:
   static char ID;
@@ -532,6 +535,26 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
   return false;
 }
 
+/// Check if we need to maintain liveness for undef subregister lanes that are
+/// live out of a block.
+bool VirtRegRewriter::needLiveOutUndefSubregDef(const LiveInterval &LI,
+                                                const MachineBasicBlock &MBB,
+                                                unsigned SubReg,
+                                                MCPhysReg PhysReg) const {
+  LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg);
+  for (const LiveInterval::SubRange &SR : LI.subranges()) {
+    LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask;
+    if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) {
+      for (const MachineBasicBlock *Succ : MBB.successors()) {
+        if (LIS->isLiveInToMBB(SR, Succ))
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 void VirtRegRewriter::rewrite() {
   bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
   SmallVector<Register, 8> SuperDeads;
@@ -586,6 +609,11 @@ void VirtRegRewriter::rewrite() {
                 MO.setIsUndef(true);
             } else if (!MO.isDead()) {
               assert(MO.isDef());
+              if (MO.isUndef()) {
+                const LiveInterval &LI = LIS->getInterval(VirtReg);
+                if (needLiveOutUndefSubregDef(LI, *MBBI, SubReg, PhysReg))
+                  SuperDefs.push_back(PhysReg);
+              }
             }
           }
 
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 86254329923971..055e9850de3d68 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -38,24 +38,19 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr30_sgpr31 = S_MOV_B64 0
   ; GFX90A-NEXT:   renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
-  ; GFX90A-NEXT:   $vgpr22 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr10 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr24 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr18 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr20 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_CBRANCH_VCCNZ %bb.59, implicit $vcc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   successors: %bb.3(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20
+  ; GFX90A-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $sgpr23 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr19 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr21 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr23 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr25 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
+  ; GFX90A-NEXT:   renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
+  ; GFX90A-NEXT:   renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
+  ; GFX90A-NEXT:   renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
+  ; GFX90A-NEXT:   renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
   ; GFX90A-NEXT:   renamable $sgpr28_sgpr29 = S_MOV_B64 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.3.Flow17:
@@ -111,8 +106,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.6.Flow20:
@@ -395,8 +390,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.37, implicit $exec
@@ -434,8 +429,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.39, implicit $exec
@@ -484,8 +479,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.41, implicit $exec
@@ -535,8 +530,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.47, implicit $exec
@@ -589,8 +584,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $sgpr44_sgpr45 = S_MOV_B64 0
   ; GFX90A-NEXT: {{  $}}
@@ -643,8 +638,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr16_sgpr17 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.43, implicit $exec
@@ -689,8 +684,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.45
   ; GFX90A-NEXT: {{  $}}
@@ -719,8 +714,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.46
   ; GFX90A-NEXT: {{  $}}
@@ -748,8 +743,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.62
   ; GFX90A-NEXT: {{  $}}
@@ -773,8 +768,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr58_sgpr59 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.53, implicit $exec
@@ -880,8 +875,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   $sgpr50_sgpr51 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.57, implicit $exec
   ; GFX90A-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index da8aa544698355..e819d5d3b1656e 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -603,7 +603,6 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
 ; GISEL-NEXT:    s_mov_b32 s14, s43
 ; GISEL-NEXT:    s_mov_b32 s15, s42
 ; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    v_mov_b32_e32 v1, v0
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GISEL-NEXT:    ; implicit-def: $vgpr31
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
@@ -1384,7 +1383,6 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
 ; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GISEL-NEXT:    v_mov_b32_e32 v2, v0
 ; GISEL-NEXT:    ; implicit-def: $vgpr1
 ; GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
index 7864564d289178..285e7e22264a04 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
@@ -30,7 +30,7 @@ body:             |
   ; CHECK-NEXT:   dead [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
-  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.6, implicit $exec
@@ -83,7 +83,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
-  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24
+  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.6
   ; CHECK-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
index 1030cdb1b43fc1..995a5d267fbed1 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
@@ -30,7 +30,7 @@ body:             |
   ; CHECK-NEXT:   dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
   ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
-  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
@@ -80,7 +80,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
-  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24
+  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24_sgpr25_sgpr26_sgpr27
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.5
   ; CHECK-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
new file mode 100644
index 00000000000000..7caa563d8b2983
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
+
+; Check for verifier error after tail duplication. An implicit_def of
+; a subregsiter is needed to maintain liveness after assignment.
+
+define amdgpu_vs void @test(i32 inreg %cmp, i32 %e0) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_cmp_eq_u32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_2
+; CHECK-NEXT:  ; %bb.1: ; %load
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    s_mov_b32 s2, s0
+; CHECK-NEXT:    s_mov_b32 s3, s0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    buffer_load_format_xy v[1:2], v1, s[0:3], 0 idxen
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    exp mrt0 v0, v1, v2, v0
+; CHECK-NEXT:    s_endpgm
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    exp mrt0 v0, v1, v2, v0
+; CHECK-NEXT:    s_endpgm
+entry:
+  %cond = icmp eq i32 %cmp, 0
+  br i1 %cond, label %end, label %load
+
+load:
+  %data1 = call <2 x i32> @llvm.amdgcn.struct.buffer.load.format.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
+  %e1 = extractelement <2 x i32> %data1, i32 0
+  %e2 = extractelement <2 x i32> %data1, i32 1
+  br label %end
+
+end:
+  %out1 = phi i32 [ 0, %entry ], [ %e1, %load ]
+  %out2 = phi i32 [ poison, %entry ], [ %e2, %load ]
+  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 %e0, i32 %out1, i32 %out2, i32 %e0, i1 false, i1 false)
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/112679


More information about the llvm-commits mailing list