[llvm] 75d6737 - AMDGPU: Fix clobbering temp reg for large frame indexes in VOP3 users (#114924)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 07:37:26 PST 2024
Author: Matt Arsenault
Date: 2024-11-05T07:37:23-08:00
New Revision: 75d673718a5d67a7b9a4d622466cd07927549ba8
URL: https://github.com/llvm/llvm-project/commit/75d673718a5d67a7b9a4d622466cd07927549ba8
DIFF: https://github.com/llvm/llvm-project/commit/75d673718a5d67a7b9a4d622466cd07927549ba8.diff
LOG: AMDGPU: Fix clobbering temp reg for large frame indexes in VOP3 users (#114924)
For a VOP3 instruction that does not permit a literal operand with an
SGPR operand, this would re-use the same scratch register for both
operands,
clobbering the original value.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8b8884a903caf4..707468892d1779 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2628,8 +2628,10 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
std::swap(FIOperandNum, OtherOpIdx);
}
- for (unsigned SrcIdx : {Src1Idx, Src0Idx}) {
- // Depending on operand constraints we may need to insert another copy.
+ // We need at most one mov to satisfy the operand constraints. Prefer to
+ // move the FI operand first, as it may be a literal in a VOP3
+ // instruction.
+ for (unsigned SrcIdx : {FIOperandNum, OtherOpIdx}) {
if (!TII->isOperandLegal(*MI, SrcIdx)) {
// If commuting didn't make the operands legal, we need to materialize
// in a register.
@@ -2648,6 +2650,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Src.ChangeToRegister(ScavengedVGPR, false);
Src.setIsKill(true);
+ break;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index f6b0405d5b0097..fc6cd74bf052ca 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -1828,3 +1828,211 @@ body: |
SI_RETURN implicit $vgpr0, implicit $sgpr4_sgpr5
...
+
+---
+name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32768, alignment: 4, local-offset: 0 }
+ - { id: 1, size: 32768, alignment: 4, local-offset: 32768 }
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+
+ ; GFX7-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+ ; GFX7: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX7-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX7-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX7-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX7-NEXT: S_ENDPGM 0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+ ; GFX8: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX8-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX8-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX8-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX8-NEXT: S_ENDPGM 0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+ ; GFX900: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX900-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX900-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX900-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX900-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+ ; GFX90A: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX90A-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+ ; GFX10: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr96_sgpr97_sgpr98_sgpr99 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $noreg, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GFX10-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32772, killed $sgpr0, 0, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX940-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+ ; GFX940: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX940-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX940-NEXT: S_ENDPGM 0
+ ;
+ ; GFX11-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+ ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32772, killed $sgpr0, 0, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register
+ ; GFX12: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32768, killed $sgpr0, 0, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX12-NEXT: S_ENDPGM 0
+ renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 %stack.1, killed $sgpr0, 0, implicit $exec
+ renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ S_ENDPGM 0
+
+...
+
+---
+name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32768, alignment: 4, local-offset: 0 }
+ - { id: 1, size: 32768, alignment: 4, local-offset: 32768 }
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+
+ ; GFX7-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+ ; GFX7: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX7-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX7-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX7-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX7-NEXT: S_ENDPGM 0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+ ; GFX8: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX8-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX8-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX8-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX8-NEXT: S_ENDPGM 0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+ ; GFX900: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX900-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX900-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX900-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX900-NEXT: S_ENDPGM 0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+ ; GFX90A: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX90A-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+ ; GFX10: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr96_sgpr97_sgpr98_sgpr99 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $noreg, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GFX10-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32772, killed $sgpr0, 0, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX940-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+ ; GFX940: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec
+ ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec
+ ; GFX940-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX940-NEXT: S_ENDPGM 0
+ ;
+ ; GFX11-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+ ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32772, killed $sgpr0, 0, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register
+ ; GFX12: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32768, killed $sgpr0, 0, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ ; GFX12-NEXT: S_ENDPGM 0
+ renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc
+ renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 %stack.1, killed $sgpr0, 0, implicit $exec
+ renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5)
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
index 7c25e5fd9c6645..9c2fef05124d7f 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
@@ -1220,9 +1220,8 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
- ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, killed $vgpr1, 0, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec
; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; MUBUFW32-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after
@@ -1236,9 +1235,8 @@ body: |
; FLATSCRW64-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
- ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
- ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, killed $vgpr1, 0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec
; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; FLATSCRW32-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after
More information about the llvm-commits
mailing list