[llvm] 6a74810 - [AMDGPU] Fix RP tracker's live registers after processing a memory clause.
Valery Pykhtin via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 1 03:48:14 PDT 2022
Author: Valery Pykhtin
Date: 2022-11-01T11:47:59+01:00
New Revision: 6a748100a02a8cf87363c4db91b6dd3bc2166d31
URL: https://github.com/llvm/llvm-project/commit/6a748100a02a8cf87363c4db91b6dd3bc2166d31
DIFF: https://github.com/llvm/llvm-project/commit/6a748100a02a8cf87363c4db91b6dd3bc2166d31.diff
LOG: [AMDGPU] Fix RP tracker's live registers after processing a memory clause.
It's incorrect to reuse live registers left from the first instruction in a clause after the clause as they don't contain in-clause defs.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D137081
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index d7ca7f36284bf..471d70a95ffdc 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -393,13 +393,11 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
Ind->insertMachineInstrInMaps(*Kill);
}
- if (!Kill) {
- RPT.reset(MI, &LiveRegsCopy);
- continue;
- }
-
// Restore the state after processing the end of the bundle.
- RPT.reset(*Kill, &LiveRegsCopy);
+ RPT.reset(MI, &LiveRegsCopy);
+
+ if (!Kill)
+ continue;
for (auto &&R : Defs) {
Register Reg = R.first;
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
index ea2b5f379ac5e..e892f4176fc6c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
@@ -85,12 +85,13 @@ define amdgpu_kernel void @max_10_vgprs_spill_v32(<32 x float> addrspace(1)* %p)
; GFX908-DAG: v_accvgpr_read_b32
; GFX900: NumVgprs: 256
-; GFX900: ScratchSize: 148
-; GFX908: NumVgprs: 254
+; GFX900: ScratchSize: 132
+; GFX908: NumVgprs: 252
; GFX908: ScratchSize: 0
-; GCN: VGPRBlocks: 63
+; GCN900: VGPRBlocks: 63
+; GCN908: VGPRBlocks: 62
; GFX900: NumVGPRsForWavesPerEU: 256
-; GFX908: NumVGPRsForWavesPerEU: 254
+; GFX908: NumVGPRsForWavesPerEU: 252
define amdgpu_kernel void @max_256_vgprs_spill_9x32(<32 x float> addrspace(1)* %p) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%p1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid
@@ -135,13 +136,13 @@ define amdgpu_kernel void @max_256_vgprs_spill_9x32(<32 x float> addrspace(1)* %
; GFX908-DAG: v_accvgpr_read_b32
; GFX900: NumVgprs: 256
-; GFX908: NumVgprs: 254
-; GFX900: ScratchSize: 1796
+; GFX908: NumVgprs: 252
+; GFX900: ScratchSize: 1668
; GFX908: ScratchSize: 0
; GFX900: VGPRBlocks: 63
-; GFX908: VGPRBlocks: 63
+; GFX908: VGPRBlocks: 62
; GFX900: NumVGPRsForWavesPerEU: 256
-; GFX908: NumVGPRsForWavesPerEU: 25
+; GFX908: NumVGPRsForWavesPerEU: 252
define amdgpu_kernel void @max_256_vgprs_spill_9x32_2bb(<32 x float> addrspace(1)* %p) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%p1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
index 97c63bdfcd1a7..676cbc503c47d 100644
--- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
@@ -52,13 +52,11 @@ body: |
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1264
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load (s256), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
- ; CHECK-NEXT: KILL killed renamable $sgpr0, renamable $sgpr1
+ ; CHECK-NEXT: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1328
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load (s256), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
- ; CHECK-NEXT: KILL killed renamable $sgpr0, renamable $sgpr1
+ ; CHECK-NEXT: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0 :: (dereferenceable load (s256), addrspace 6)
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1392
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
@@ -73,25 +71,19 @@ body: |
; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
- ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
- ; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
- ; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
+ ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
+ ; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
+ ; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
; CHECK-NEXT: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4)
; CHECK-NEXT: KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7
- ; CHECK-NEXT: KILL killed renamable $sgpr92_sgpr93_sgpr94_sgpr95
; CHECK-NEXT: KILL killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
- ; CHECK-NEXT: KILL killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
- ; CHECK-NEXT: KILL killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43
; CHECK-NEXT: KILL killed renamable $vgpr5_vgpr6
; CHECK-NEXT: KILL killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: KILL killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59
- ; CHECK-NEXT: KILL killed renamable $sgpr96_sgpr97_sgpr98_sgpr99
- ; CHECK-NEXT: KILL killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51
; CHECK-NEXT: KILL killed renamable $sgpr8_sgpr9_sgpr10_sgpr11
- ; CHECK-NEXT: KILL killed renamable $sgpr88_sgpr89_sgpr90_sgpr91
; CHECK-NEXT: KILL killed renamable $vgpr0
; CHECK-NEXT: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 killed $vgpr7, killed $vgpr8, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $vgpr0 = V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr9, 0, killed $vgpr10, 0, 0, implicit $mode, implicit $exec
More information about the llvm-commits
mailing list