[PATCH] D100098: [AMDGPU] Fix computing live registers in prolog

Sebastian Neubauer via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 8 05:07:24 PDT 2021


sebastian-ne created this revision.
sebastian-ne added reviewers: scott.linder, arsenm.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, qcolombet.
sebastian-ne requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

ScratchExecCopy needs to be marked as live, we cannot use that register
while EXEC is stored in there.

Marking SGPRForFPSaveRestoreCopy and SGPRForBPSaveRestoreCopy as
available is unnecessary, they should not be live at that point anway.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100098

Files:
  llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
  llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir


Index: llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -79,8 +79,8 @@
     ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
     ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
     ; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
-    ; GFX9-FLATSCR: $sgpr4 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
-    ; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
+    ; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
+    ; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
     ; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
     ; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
@@ -92,8 +92,8 @@
     ; GFX9-FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc
     ; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0
     ; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
-    ; GFX9-FLATSCR: $sgpr4 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
-    ; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
+    ; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
+    ; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
     ; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GFX9-FLATSCR: S_ENDPGM 0, csr_amdgpu_allvgprs
     $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
Index: llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -803,18 +803,12 @@
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo &TRI = TII->getRegisterInfo();
-  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
   DebugLoc DL;
 
   if (LiveRegs.empty()) {
     if (IsProlog) {
       LiveRegs.init(TRI);
       LiveRegs.addLiveIns(MBB);
-      if (FuncInfo->SGPRForFPSaveRestoreCopy)
-        LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
-
-      if (FuncInfo->SGPRForBPSaveRestoreCopy)
-        LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy);
     } else {
       // In epilog.
       LiveRegs.init(*ST.getRegisterInfo());
@@ -828,8 +822,7 @@
   if (!ScratchExecCopy)
     report_fatal_error("failed to find free scratch register");
 
-  if (!IsProlog)
-    LiveRegs.removeReg(ScratchExecCopy);
+  LiveRegs.addReg(ScratchExecCopy);
 
   const unsigned OrSaveExec =
       ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D100098.336065.patch
Type: text/x-patch
Size: 5227 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210408/88f16da6/attachment-0001.bin>


More information about the llvm-commits mailing list