[llvm] [AMDGPU] Prevent genration of unused SGPR IMPLICIT_DEF assignments (PR #155241)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 25 06:55:09 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Chris Jackson (chrisjbris)

<details>
<summary>Changes</summary>

Dead VGPR->SGPR copies were converted to IMPLICIT_DEF assignments that were unused. Prevent these from being created and update the numerous affected tests. This fixes https://github.com/llvm/llvm-project/issues/144518. But perhaps we don't want to do this, based on the comment in SIInstrinfo.cpp and the commit below - 

https://github.com/chrisjbris/llvm-project/commit/69932e4d692f20615935db0d48f45b21a89cae23 states that this behaviour is desired.

---

Patch is 775.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155241.diff


72 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+3-6) 
- (modified) llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll (+2-4) 
- (modified) llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll (-4) 
- (modified) llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir (-3) 
- (modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (-2) 
- (modified) llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll (-4) 
- (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (-144) 
- (modified) llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll (-1) 
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir (-2) 
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-gfx12-fake16.mir (-5) 
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-gfx12-true16.mir (-5) 
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir (+7-13) 
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir (-6) 
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir (+3-4) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll (-24) 
- (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll (-72) 
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll (-4) 
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll (-4) 
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll (-4) 
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll (+80-176) 
- (modified) llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll (+1-5) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll (-25) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll (-30) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.ll (-173) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f16.ll (-59) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f32.ll (-85) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.ll (-176) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll (-45) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll (-54) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.ll (-313) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f16.ll (-103) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f32.ll (-133) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.ll (-308) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll (-72) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll (-81) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.f16.ll (-92) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.ll (-232) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll (-40) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll (-45) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.f16.ll (-52) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.ll (-132) 
- (modified) llvm/test/CodeGen/AMDGPU/legalize-soffset-mbuf.ll (+24-92) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll (-26) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll (-1) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll (-24) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll (-16) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll (-6) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll (-6) 
- (modified) llvm/test/CodeGen/AMDGPU/mmra.ll (+25-47) 
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll (-7) 
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-lshlrev.mir (+4-6) 
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans-f16-fake16.ll (-10) 
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll (-5) 
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll (-40) 
- (modified) llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll (-33) 
- (modified) llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll (-56) 
- (modified) llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir (-44) 
- (modified) llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir (-2) 
- (modified) llvm/test/CodeGen/AMDGPU/rem_i128.ll (-180) 
- (modified) llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir (-2) 
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll (-6) 
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.mir (+3-1) 
- (modified) llvm/test/CodeGen/AMDGPU/shl64_reduce_flags.ll (+6-12) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir (-1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll (-1) 
- (modified) llvm/test/CodeGen/AMDGPU/srl64_reduce_flags.ll (+2-4) 
- (modified) llvm/test/CodeGen/AMDGPU/trap-abis.ll (-1) 
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll (+5-291) 
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (-1) 
- (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll (+76-141) 
- (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved.ll (-40) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ba37bdb203a7f..38a1dad1090f8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8063,12 +8063,9 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
       MRI.replaceRegWith(DstReg, NewDstReg);
       MRI.clearKillFlags(NewDstReg);
       Inst.getOperand(0).setReg(DstReg);
-      // Make sure we don't leave around a dead VGPR->SGPR copy. Normally
-      // these are deleted later, but at -O0 it would leave a suspicious
-      // looking illegal copy of an undef register.
-      for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
-        Inst.removeOperand(I);
-      Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
+      // Completely remove dead VGPR->SGPR copies and ensure that dead
+      // assignments to IMPLICIT_DEF are not created.
+      Inst.eraseFromParent();
       // Legalize t16 operand since replaceReg is called after addUsersToVALU
       for (MachineOperand &MO :
            make_early_inc_range(MRI.use_operands(NewDstReg))) {
diff --git a/llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll b/llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll
index 59f3a4915b9a7..70db5fa12e997 100644
--- a/llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll
@@ -20,14 +20,12 @@ define i64 @ashr_exact(i64 %arg0, i64 %shift_amt) {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
   ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
   ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, killed [[COPY4]], %subreg.sub1
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
   ; CHECK-NEXT:   [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = exact V_ASHRREV_I32_e64 killed [[COPY5]], [[COPY3]], implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll
index efaee6feebaaf..146010c7146ef 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll
@@ -48,10 +48,6 @@ define amdgpu_ps void @buffer_store_v8f16(ptr addrspace(8) inreg %rsrc, <8 x hal
   ; GCN-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
   ; GCN-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
   ; GCN-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
   ; GCN-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
   ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GCN-NEXT:   [[COPY13:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE3]]
diff --git a/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir b/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir
index 4ff3b5ab566e4..4ad8dde41d37d 100644
--- a/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir
@@ -21,7 +21,6 @@ body:               |
     ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF]], [[COPY]], implicit-def $vcc_lo, implicit $exec
     ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[DEF3]]
     ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF2]], [[COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
-    ; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
     ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 [[V_MUL_HI_U32_U24_e64_]], [[COPY2]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
     %0:sreg_32 = S_MOV_B32 681
@@ -52,11 +51,9 @@ body:               |
     ; GCN-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-    ; GCN-NEXT: [[DEF6:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF2]], [[DEF]], implicit-def $vcc_lo, implicit $exec
     ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[DEF4]]
     ; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF3]], [[COPY]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
-    ; GCN-NEXT: [[DEF7:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF5]]
     ; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[V_ADDC_U32_e32_1]], [[COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
     %0:vgpr_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index c30ce8c8ed507..2b63a8cf69476 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -437,7 +437,6 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b32 s4, 2
 ; GCN-O0-NEXT:    v_lshlrev_b32_e64 v2, s4, v0
 ; GCN-O0-NEXT:    s_mov_b32 s4, 0
-; GCN-O0-NEXT:    ; implicit-def: $sgpr4
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
@@ -668,7 +667,6 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-O0-NEXT:    s_mov_b32 s0, 2
 ; GCN-O0-NEXT:    v_lshlrev_b32_e64 v2, s0, v0
 ; GCN-O0-NEXT:    s_mov_b32 s1, 0
-; GCN-O0-NEXT:    ; implicit-def: $sgpr1
 ; GCN-O0-NEXT:    s_waitcnt expcnt(0)
 ; GCN-O0-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
diff --git a/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll b/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll
index d63a36c4b2958..0c147b547db37 100644
--- a/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll
+++ b/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll
@@ -107,11 +107,7 @@ define i64 @v_or_i64_disjoint(i64 %a, i64 %b) {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 747affa928601..49ba0e2ac796a 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -228,27 +228,15 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX9-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v21, v0
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v5
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v3
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v3
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v9
@@ -268,8 +256,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v0, v13, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s11
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v0, vcc, v0, v19, vcc
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v3
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v6
@@ -278,20 +264,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v5
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v2
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v3
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v5
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v1, v19, v0, s[4:5]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v4
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v13, v0, s[4:5]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v1
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v7
@@ -305,8 +285,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v7, v12, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, s11
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v14, vcc
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v9
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v18
@@ -315,20 +293,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v9, v8, v9, s[4:5]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v17
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v8, s[4:5]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v6
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v18, v9
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v11
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v14, v7, s[4:5]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v7, v12, v7, s[4:5]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, v7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
 ; GFX9-O0-NEXT:    v_xor_b32_e64 v14, v14, v19
@@ -390,7 +362,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_ffbh_u32_e64 v8, v8
 ; GFX9-O0-NEXT:    v_min_u32_e64 v7, v7, v8
 ; GFX9-O0-NEXT:    s_mov_b32 s12, 0
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr14
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v10
@@ -399,7 +370,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_add_u32_e64 v6, v6, s13
 ; GFX9-O0-NEXT:    v_ffbh_u32_e64 v9, v9
 ; GFX9-O0-NEXT:    v_min_u32_e64 v12, v6, v9
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr14
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v6
@@ -418,8 +388,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v7
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v9
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v8, v7, v8, s[8:9]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v6
 ; GFX9-O0-NEXT:    s_mov_b64 s[8:9], s[6:7]
@@ -428,7 +396,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s13
 ; GFX9-O0-NEXT:    v_ffbh_u32_e64 v5, v1
 ; GFX9-O0-NEXT:    v_min_u32_e64 v5, v4, v5
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr16
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
@@ -437,7 +404,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_add_u32_e64 v4, v4, s13
 ; GFX9-O0-NEXT:    v_ffbh_u32_e64 v10, v3
 ; GFX9-O0-NEXT:    v_min_u32_e64 v11, v4, v10
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr13
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v4
@@ -455,8 +421,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v5
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v10
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
@@ -471,15 +435,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v6, s11
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, s11
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v8
 ; GFX9-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; GFX9-O0-NEXT:    s_nop 0
 ; GFX9-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v6
 ; GFX9-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
@@ -520,8 +480,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], s[8:9]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s10
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[12:13]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr12
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v4
 ; GFX9-O0-NEXT:    s_mov_b64 s[12:13], s[8:9]
@@ -529,8 +487,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v3, v4, s[12:13]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s10
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[8:9]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr8
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v4
 ; GFX9-O0-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
@@ -765,8 +721,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v7
 ; GFX9-O0-NEXT:    v_ashrrev_i64 v[13:14], s4, v[11:12]
@@ -799,12 +753,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
 ; GFX9-O0-NEXT:    v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v7
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v10
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v11, v8
@@ -822,12 +772,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s4
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v9
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v8
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v8, v16
@@ -950,8 +896,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v18
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr4
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v5
 ; GFX9-O0-NEXT:    v_lshrrev_b64 v[4:5], v4, v[14:15]
@@ -964,8 +908,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    s_mov_b32 s8, s4
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v4, s8
 ; GFX9-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[6:7]
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v5, v14
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v12
@@ -983,12 +925,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v18, vcc, v14, v15, vcc
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, s6
 ; GFX9-O0-NEXT:    v_addc_co_u32_e32 v13, vcc, v13, v14, vcc
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
 ; GFX9-O0-NEXT:    ; k...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/155241


More information about the llvm-commits mailing list