[llvm] [AMDGPU] Exec-protect regular VGPR spill saves under narrowed exec (PR #184135)

Michael Halkenhäuser via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 2 06:54:01 PST 2026


https://github.com/mhalk updated https://github.com/llvm/llvm-project/pull/184135

>From 9b4c805129bd7fe0ca68b6cb501f61cd8bde052d Mon Sep 17 00:00:00 2001
From: Michael Halkenhaeuser <MichaelGerald.Halkenhauser at amd.com>
Date: Mon, 2 Mar 2026 08:52:55 -0600
Subject: [PATCH] [AMDGPU] Exec-protect regular VGPR spill saves under narrowed
 exec

Changes:
- SIRegisterInfo::eliminateFrameIndex: save path uses SGPRForEXECCopy
  (when valid) instead of gating on isWWMRegSpillOpcode; restore path
  retains WWM-only exec protection.
- SILowerSGPRSpills: keep SGPRForEXECCopy reserved when the function
  has exec-modifying instructions, not only when WWM spills exist.
- SIFrameLowering: extend NeedExecCopyReservedReg to include regular
  VGPR spill pseudos when SGPRForEXECCopy is valid, so the
  reservation survives PrologEpilogInserter. The guard prevents
  assertion failures when exec is never narrowed (no exec copy SGPR
  reserved).

Made-with: Cursor
---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp   |  3 ++-
 llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 20 ++++++++++++++---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp    | 23 +++++++++++---------
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 4a62af56fd8e5..980366a3c5d6e 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1707,7 +1707,8 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
     for (MachineInstr &MI : MBB) {
       // TODO: Walking through all MBBs here would be a bad heuristic. Better
       // handle them elsewhere.
-      if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
+      if (TII->isWWMRegSpillOpcode(MI.getOpcode()) ||
+          (TII->isVGPRSpill(MI) && MFI->getSGPRForEXECCopy()))
         NeedExecCopyReservedReg = true;
       else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
                MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 196e551932659..ebb626dc92a96 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -563,9 +563,23 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
                              TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
       FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
   } else {
-    // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
-    // spills/copies. Reset the SGPR reserved for EXEC copy.
-    FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
+    // Keep SGPRForEXECCopy reserved if exec may be narrowed in this function.
+    // Regular VGPR scratch spills require exec protection: memory ops respect
+    // the exec mask, so spills under narrowed exec leave inactive lanes
+    // unwritten, corrupting values on reload under wider exec.
+    bool HasExecModify = false;
+    for (const MachineBasicBlock &MBB : MF) {
+      for (const MachineInstr &MI : MBB) {
+        if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+          HasExecModify = true;
+          break;
+        }
+      }
+      if (HasExecModify)
+        break;
+    }
+    if (!HasExecModify)
+      FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
   }
 
   SaveBlocks.clear();
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 99eb90b11182d..2c86ab1d7ed01 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2460,9 +2460,9 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       }
 
       auto *MBB = MI->getParent();
-      bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
-      if (IsWWMRegSpill) {
-        TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
+      Register ExecCopyReg = MFI->getSGPRForEXECCopy();
+      if (ExecCopyReg) {
+        TII->insertScratchExecCopy(*MF, *MBB, MI, DL, ExecCopyReg,
                                   RS->isRegUsed(AMDGPU::SCC));
       }
       buildSpillLoadStore(
@@ -2470,8 +2470,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
           *MI->memoperands_begin(), RS);
       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(*MI, TII));
-      if (IsWWMRegSpill)
-        TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
+      if (ExecCopyReg)
+        TII->restoreExec(*MF, *MBB, MI, DL, ExecCopyReg);
 
       MI->eraseFromParent();
       return true;
@@ -2547,9 +2547,12 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       }
 
       auto *MBB = MI->getParent();
-      bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
-      if (IsWWMRegSpill) {
-        TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
+      bool IsWWMRegSpill =
+          TII->isWWMRegSpillOpcode(MI->getOpcode());
+      Register ExecCopyReg =
+          IsWWMRegSpill ? MFI->getSGPRForEXECCopy() : Register();
+      if (ExecCopyReg) {
+        TII->insertScratchExecCopy(*MF, *MBB, MI, DL, ExecCopyReg,
                                    RS->isRegUsed(AMDGPU::SCC));
       }
 
@@ -2558,8 +2561,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
           *MI->memoperands_begin(), RS);
 
-      if (IsWWMRegSpill)
-        TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
+      if (ExecCopyReg)
+        TII->restoreExec(*MF, *MBB, MI, DL, ExecCopyReg);
 
       MI->eraseFromParent();
       return true;



More information about the llvm-commits mailing list