[llvm] [AMDGPU] Generate waterfall for calls with SGPR(inreg) argument (PR #146997)
Juan Manuel Martinez CaamaƱo via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 01:48:46 PDT 2025
================
@@ -7846,26 +7889,57 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
Register DstReg = Inst.getOperand(0).getReg();
const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
- // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and
- // hope for the best.
if (Inst.isCopy() && DstReg.isPhysical() &&
RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
- // TODO: Only works for 32 bit registers.
- if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
- get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
- .add(Inst.getOperand(1));
- } else {
- Register NewDst =
- MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
- get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
- .add(Inst.getOperand(1));
- BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY),
- DstReg)
- .addReg(NewDst);
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ Register SrcReg = Inst.getOperand(1).getReg();
+ MachineBasicBlock::iterator I = Inst.getIterator();
+ MachineBasicBlock::iterator E = Inst.getParent()->end();
+ // Only search current block since phyreg's def & use cannot cross
+ // blocks when MF.NoPhi = false.
+ while (++I != E) {
+ // Currently, we only support waterfall on SI_CALL_ISEL.
+ if (I->getOpcode() == AMDGPU::SI_CALL_ISEL) {
+ MachineInstr *UseMI = &*I;
+ for (unsigned i = 0; i < UseMI->getNumOperands(); ++i) {
+ if (UseMI->getOperand(i).isReg() &&
+ UseMI->getOperand(i).getReg() == DstReg) {
+ MachineOperand *MO = &UseMI->getOperand(i);
+ MO->setReg(SrcReg);
+ V2PhysSCopyInfo &V2SCopyInfo = Worklist.WaterFalls[UseMI];
+ V2SCopyInfo.MOs.push_back(MO);
+ V2SCopyInfo.SGPRs.push_back(DstReg);
+ Worklist.V2PhySCopiesToErase[&Inst] = true;
+ }
+ }
+ } else if ((I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG &&
+ I->getOperand(0).isReg() &&
+ I->getOperand(0).getReg() == DstReg) ||
+ DstReg == AMDGPU::M0) {
+ // If it's a copy of a VGPR to a physical SGPR, insert a
+ // V_READFIRSTLANE and hope for the best.
+ // TODO: Only works for 32 bit registers.
+ if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
+ .add(Inst.getOperand(1));
+ } else {
+ Register NewDst =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
+ .add(Inst.getOperand(1));
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::COPY), DstReg)
+ .addReg(NewDst);
+ }
+ Worklist.V2PhySCopiesToErase[&Inst] = true;
----------------
jmmartinez wrote:
Is it possible for `V2PhySCopiesToErase[&Inst]` to be already `false` and then here we override that to `true` ?
I think what you want is a `V2PhySCopiesToErase.try_emplace(&Inst, true)`. Which will try to set `V2PhySCopiesToErase[&Inst] = true`, but if a previous value already exists it will keep it.
https://github.com/llvm/llvm-project/pull/146997
More information about the llvm-commits
mailing list