[llvm] r361848 - AMDGPU: Don't enable all lanes with non-CSR VGPR spills

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue May 28 09:46:02 PDT 2019


Author: arsenm
Date: Tue May 28 09:46:02 2019
New Revision: 361848

URL: http://llvm.org/viewvc/llvm-project?rev=361848&view=rev
Log:
AMDGPU: Don't enable all lanes with non-CSR VGPR spills

If the only VGPRs used for SGPR spilling were not CSRs, this was
enabling all laness and immediately restoring exec. This is the usual
situation in leaf functions.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=361848&r1=361847&r2=361848&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Tue May 28 09:46:02 2019
@@ -613,30 +613,36 @@ void SIFrameLowering::emitPrologue(Machi
       .setMIFlag(MachineInstr::FrameSetup);
   }
 
-  if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
-    if (LiveRegs.empty()) {
-      LiveRegs.init(TRI);
-      LiveRegs.addLiveIns(MBB);
+  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
+  // turn on all lanes before doing the spill to memory.
+  unsigned ScratchExecCopy = AMDGPU::NoRegister;
+
+  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+         : FuncInfo->getSGPRSpillVGPRs()) {
+    if (!Reg.FI.hasValue())
+      continue;
+
+    if (ScratchExecCopy == AMDGPU::NoRegister) {
+      if (LiveRegs.empty()) {
+        LiveRegs.init(TRI);
+        LiveRegs.addLiveIns(MBB);
+      }
+
+      ScratchExecCopy
+        = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+                                           AMDGPU::SReg_64_XEXECRegClass);
+
+      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
+              ScratchExecCopy)
+        .addImm(-1);
     }
 
-    // To avoid clobbering VGPRs in lanes that weren't active on function entry,
-    // turn on all lanes before doing the spill to memory.
-    unsigned ScratchExecCopy
-      = findScratchNonCalleeSaveRegister(MF, LiveRegs,
-                                         AMDGPU::SReg_64_XEXECRegClass);
-
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
-      .addImm(-1);
-
-    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
-           : FuncInfo->getSGPRSpillVGPRs()) {
-      if (!Reg.FI.hasValue())
-        continue;
-      TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
-                               Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                               &TII->getRegisterInfo());
-    }
+    TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
+                             Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+                             &TII->getRegisterInfo());
+  }
 
+  if (ScratchExecCopy != AMDGPU::NoRegister) {
     // FIXME: Split block and make terminator.
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
       .addReg(ScratchExecCopy);
@@ -654,27 +660,31 @@ void SIFrameLowering::emitEpilogue(Machi
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
   DebugLoc DL;
 
-  if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
-    // See emitPrologue
-    LivePhysRegs LiveRegs(*ST.getRegisterInfo());
-    LiveRegs.addLiveIns(MBB);
-
-    unsigned ScratchExecCopy
-      = findScratchNonCalleeSaveRegister(MF, LiveRegs,
-                                         AMDGPU::SReg_64_XEXECRegClass);
-
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
-      .addImm(-1);
-
-    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
-           : FuncInfo->getSGPRSpillVGPRs()) {
-      if (!Reg.FI.hasValue())
-        continue;
-      TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
-                                Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                                &TII->getRegisterInfo());
+  unsigned ScratchExecCopy = AMDGPU::NoRegister;
+  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+         : FuncInfo->getSGPRSpillVGPRs()) {
+    if (!Reg.FI.hasValue())
+      continue;
+
+    if (ScratchExecCopy == AMDGPU::NoRegister) {
+      // See emitPrologue
+      LivePhysRegs LiveRegs(*ST.getRegisterInfo());
+      LiveRegs.addLiveIns(MBB);
+
+      ScratchExecCopy
+        = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+                                           AMDGPU::SReg_64_XEXECRegClass);
+
+      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
+        .addImm(-1);
     }
 
+    TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
+                              Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+                              &TII->getRegisterInfo());
+  }
+
+  if (ScratchExecCopy != AMDGPU::NoRegister) {
     // FIXME: Split block and make terminator.
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
       .addReg(ScratchExecCopy);

Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll?rev=361848&r1=361847&r2=361848&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll Tue May 28 09:46:02 2019
@@ -135,5 +135,21 @@ define void @callee_func_sgpr_spill_no_c
   ret void
 }
 
+; Has no spilled CSR VGPRs used for SGPR spilling, so no need to
+; enable all lanes and restore.
+
+; GCN-LABEL: {{^}}spill_only_csr_sgpr:
+; GCN: s_waitcnt
+; GCN-NEXT: v_writelane_b32 v0, s42, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; clobber s42
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s42, v0, 0
+; GCN-NEXT: s_setpc_b64
+define void @spill_only_csr_sgpr() {
+  call void asm sideeffect "; clobber s42", "~{s42}"()
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind "no-frame-pointer-elim"="true" }




More information about the llvm-commits mailing list