[PATCH] D100098: [AMDGPU] Fix computing live registers in prolog
Sebastian Neubauer via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 8 06:04:37 PDT 2021
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc10cc4ea2729: [AMDGPU] Fix computing live registers in prolog (authored by sebastian-ne).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D100098/new/
https://reviews.llvm.org/D100098
Files:
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
Index: llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -79,8 +79,8 @@
; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX9-FLATSCR: $sgpr4 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
- ; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
+ ; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
+ ; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
@@ -92,8 +92,8 @@
; GFX9-FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc
; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX9-FLATSCR: $sgpr4 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
- ; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
+ ; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
+ ; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9-FLATSCR: S_ENDPGM 0, csr_amdgpu_allvgprs
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
Index: llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -803,18 +803,12 @@
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
- SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
DebugLoc DL;
if (LiveRegs.empty()) {
if (IsProlog) {
LiveRegs.init(TRI);
LiveRegs.addLiveIns(MBB);
- if (FuncInfo->SGPRForFPSaveRestoreCopy)
- LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
-
- if (FuncInfo->SGPRForBPSaveRestoreCopy)
- LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy);
} else {
// In epilog.
LiveRegs.init(*ST.getRegisterInfo());
@@ -828,8 +822,7 @@
if (!ScratchExecCopy)
report_fatal_error("failed to find free scratch register");
- if (!IsProlog)
- LiveRegs.removeReg(ScratchExecCopy);
+ LiveRegs.addReg(ScratchExecCopy);
const unsigned OrSaveExec =
ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D100098.336074.patch
Type: text/x-patch
Size: 5227 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210408/1a626514/attachment.bin>
More information about the llvm-commits
mailing list