[llvm-branch-commits] [llvm] [AMDGPU] Allocate scratch space for dVGPRs for CWSR (PR #130055)
Carl Ritson via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Mar 6 22:10:07 PST 2025
================
@@ -691,17 +691,61 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
- if (hasFP(MF)) {
+ unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);
+ if (!mayReserveScratchForCWSR(MF)) {
+ if (hasFP(MF)) {
+ Register FPReg = MFI->getFrameOffsetReg();
+ assert(FPReg != AMDGPU::FP_REG);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
+ }
+
+ if (requiresStackPointerReference(MF)) {
+ Register SPReg = MFI->getStackPtrOffsetReg();
+ assert(SPReg != AMDGPU::SP_REG);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);
+ }
+ } else {
+ // We need to check if we're on a compute queue - if we are, then the CWSR
+ // trap handler may need to store some VGPRs on the stack. The first VGPR
+ // block is saved separately, so we only need to allocate space for any
+ // additional VGPR blocks used. For now, we will make sure there's enough
+ // room for the theoretical maximum number of VGPRs that can be allocated.
+ // FIXME: Figure out if the shader uses fewer VGPRs in practice.
+ assert(hasFP(MF));
Register FPReg = MFI->getFrameOffsetReg();
assert(FPReg != AMDGPU::FP_REG);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
- }
-
- if (requiresStackPointerReference(MF)) {
Register SPReg = MFI->getStackPtrOffsetReg();
assert(SPReg != AMDGPU::SP_REG);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
- .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
+ unsigned VGPRSize =
+ llvm::alignTo((ST.getAddressableNumVGPRs() -
+ AMDGPU::IsaInfo::getVGPRAllocGranule(&ST)) *
+ 4,
+ FrameInfo.getMaxAlign());
+ MFI->setScratchReservedForDynamicVGPRs(VGPRSize);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), FPReg)
+ .addImm(AMDGPU::Hwreg::HwregEncoding::encode(
+ AMDGPU::Hwreg::ID_HW_ID2, AMDGPU::Hwreg::OFFSET_ME_ID, 1));
----------------
perlfu wrote:
Do you not need to retrieve 2 bits?
i.e. AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_HW_ID2, AMDGPU::Hwreg::OFFSET_ME_ID, **2**)
https://github.com/llvm/llvm-project/pull/130055
More information about the llvm-branch-commits
mailing list