[llvm] [AMDGPU] Prevent genration of unused SGPR IMPLICIT_DEF assignments (PR #155241)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 06:55:09 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Chris Jackson (chrisjbris)
<details>
<summary>Changes</summary>
Dead VGPR->SGPR copies were converted to IMPLICIT_DEF assignments that were unused. Prevent these from being created and update the numerous affected tests. This fixes https://github.com/llvm/llvm-project/issues/144518. But perhaps we don't want to do this, based on the comment in SIInstrinfo.cpp and the commit below -
https://github.com/chrisjbris/llvm-project/commit/69932e4d692f20615935db0d48f45b21a89cae23 states that this behaviour is desired.
---
Patch is 775.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155241.diff
72 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+3-6)
- (modified) llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll (+2-4)
- (modified) llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll (-4)
- (modified) llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir (-3)
- (modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (-2)
- (modified) llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll (-4)
- (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (-144)
- (modified) llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir (-2)
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-gfx12-fake16.mir (-5)
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-gfx12-true16.mir (-5)
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir (+7-13)
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir (-6)
- (modified) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir (+3-4)
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll (-24)
- (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll (-72)
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll (-4)
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll (-4)
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll (-4)
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll (+80-176)
- (modified) llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll (+1-5)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll (-25)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll (-30)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.ll (-173)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f16.ll (-59)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f32.ll (-85)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.ll (-176)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll (-45)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll (-54)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.ll (-313)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f16.ll (-103)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f32.ll (-133)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.ll (-308)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll (-72)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll (-81)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.f16.ll (-92)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.ll (-232)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll (-40)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll (-45)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.f16.ll (-52)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.ll (-132)
- (modified) llvm/test/CodeGen/AMDGPU/legalize-soffset-mbuf.ll (+24-92)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll (-26)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll (-24)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll (-16)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll (-6)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll (-6)
- (modified) llvm/test/CodeGen/AMDGPU/mmra.ll (+25-47)
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-addsubu64.ll (-7)
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-lshlrev.mir (+4-6)
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans-f16-fake16.ll (-10)
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll (-5)
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll (-40)
- (modified) llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll (-33)
- (modified) llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll (-56)
- (modified) llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir (-44)
- (modified) llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir (-2)
- (modified) llvm/test/CodeGen/AMDGPU/rem_i128.ll (-180)
- (modified) llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir (-2)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll (-6)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.mir (+3-1)
- (modified) llvm/test/CodeGen/AMDGPU/shl64_reduce_flags.ll (+6-12)
- (modified) llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir (-1)
- (modified) llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/srl64_reduce_flags.ll (+2-4)
- (modified) llvm/test/CodeGen/AMDGPU/trap-abis.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll (+5-291)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll (+76-141)
- (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved.ll (-40)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ba37bdb203a7f..38a1dad1090f8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8063,12 +8063,9 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
MRI.replaceRegWith(DstReg, NewDstReg);
MRI.clearKillFlags(NewDstReg);
Inst.getOperand(0).setReg(DstReg);
- // Make sure we don't leave around a dead VGPR->SGPR copy. Normally
- // these are deleted later, but at -O0 it would leave a suspicious
- // looking illegal copy of an undef register.
- for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
- Inst.removeOperand(I);
- Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
+ // Completely remove dead VGPR->SGPR copies and ensure that dead
+ // assignments to IMPLICIT_DEF are not created.
+ Inst.eraseFromParent();
// Legalize t16 operand since replaceReg is called after addUsersToVALU
for (MachineOperand &MO :
make_early_inc_range(MRI.use_operands(NewDstReg))) {
diff --git a/llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll b/llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll
index 59f3a4915b9a7..70db5fa12e997 100644
--- a/llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/ashr64_reduce_flags.ll
@@ -20,14 +20,12 @@ define i64 @ashr_exact(i64 %arg0, i64 %shift_amt) {
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, killed [[COPY4]], %subreg.sub1
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
; CHECK-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = exact V_ASHRREV_I32_e64 killed [[COPY5]], [[COPY3]], implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll
index efaee6feebaaf..146010c7146ef 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsic-mmo-type.ll
@@ -48,10 +48,6 @@ define amdgpu_ps void @buffer_store_v8f16(ptr addrspace(8) inreg %rsrc, <8 x hal
; GCN-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
; GCN-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
- ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN-NEXT: [[COPY13:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE3]]
diff --git a/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir b/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir
index 4ff3b5ab566e4..4ad8dde41d37d 100644
--- a/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/change-scc-to-vcc.mir
@@ -21,7 +21,6 @@ body: |
; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF]], [[COPY]], implicit-def $vcc_lo, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[DEF3]]
; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF2]], [[COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
- ; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_2]]
; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 [[V_MUL_HI_U32_U24_e64_]], [[COPY2]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
%0:sreg_32 = S_MOV_B32 681
@@ -52,11 +51,9 @@ body: |
; GCN-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF6:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[DEF2]], [[DEF]], implicit-def $vcc_lo, implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed [[DEF4]]
; GCN-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[DEF3]], [[COPY]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
- ; GCN-NEXT: [[DEF7:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF5]]
; GCN-NEXT: [[V_ADDC_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 killed [[V_ADDC_U32_e32_1]], [[COPY1]], implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
%0:vgpr_32 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index c30ce8c8ed507..2b63a8cf69476 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -437,7 +437,6 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s4, 2
; GCN-O0-NEXT: v_lshlrev_b32_e64 v2, s4, v0
; GCN-O0-NEXT: s_mov_b32 s4, 0
-; GCN-O0-NEXT: ; implicit-def: $sgpr4
; GCN-O0-NEXT: s_waitcnt expcnt(0)
; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
@@ -668,7 +667,6 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s0, 2
; GCN-O0-NEXT: v_lshlrev_b32_e64 v2, s0, v0
; GCN-O0-NEXT: s_mov_b32 s1, 0
-; GCN-O0-NEXT: ; implicit-def: $sgpr1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
diff --git a/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll b/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll
index d63a36c4b2958..0c147b547db37 100644
--- a/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll
+++ b/llvm/test/CodeGen/AMDGPU/dag-preserve-disjoint-flag.ll
@@ -107,11 +107,7 @@ define i64 @v_or_i64_disjoint(i64 %a, i64 %b) {
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 747affa928601..49ba0e2ac796a 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -228,27 +228,15 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v21, v0
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v9
@@ -268,8 +256,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v0, v13, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s11
; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v19, vcc
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
@@ -278,20 +264,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v3
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5
; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v19, v0, s[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4
; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v13, v0, s[4:5]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
@@ -305,8 +285,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v7, v12, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v14, vcc
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
@@ -315,20 +293,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v8, v9, s[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[4:5]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v14, v7, s[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v12, v7, s[4:5]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8
; GFX9-O0-NEXT: v_xor_b32_e64 v14, v14, v19
@@ -390,7 +362,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8
; GFX9-O0-NEXT: v_min_u32_e64 v7, v7, v8
; GFX9-O0-NEXT: s_mov_b32 s12, 0
-; GFX9-O0-NEXT: ; implicit-def: $sgpr14
; GFX9-O0-NEXT: v_mov_b32_e32 v10, s12
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10
@@ -399,7 +370,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s13
; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9
; GFX9-O0-NEXT: v_min_u32_e64 v12, v6, v9
-; GFX9-O0-NEXT: ; implicit-def: $sgpr14
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s12
; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6
@@ -418,8 +388,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9
; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v7, v8, s[8:9]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
@@ -428,7 +396,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1
; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr16
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
@@ -437,7 +404,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3
; GFX9-O0-NEXT: v_min_u32_e64 v11, v4, v10
-; GFX9-O0-NEXT: ; implicit-def: $sgpr13
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4
@@ -455,8 +421,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
@@ -471,15 +435,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11
; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
@@ -520,8 +480,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[12:13]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: ; implicit-def: $sgpr12
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[8:9]
@@ -529,8 +487,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[12:13]
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4
; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
@@ -765,8 +721,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
-; GFX9-O0-NEXT: ; implicit-def: $sgpr5
-; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
; GFX9-O0-NEXT: v_ashrrev_i64 v[13:14], s4, v[11:12]
@@ -799,12 +753,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8
@@ -822,12 +772,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16
@@ -950,8 +896,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v18
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[14:15]
@@ -964,8 +908,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b32 s8, s4
; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[6:7]
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
@@ -983,12 +925,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_addc_co_u32_e32 v18, vcc, v14, v15, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v14, s6
; GFX9-O0-NEXT: v_addc_co_u32_e32 v13, vcc, v13, v14, vcc
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; k...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155241
More information about the llvm-commits
mailing list