[llvm] [AMDGPU] Generate waterfall for calls with SGPR(inreg) argument (PR #146997)
Juan Manuel Martinez CaamaƱo via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 12 01:16:29 PDT 2025
================
@@ -7581,6 +7607,94 @@ void SIInstrInfo::moveToVALU(SIInstrWorklist &Worklist,
assert(Worklist.empty() &&
"Deferred MachineInstr are not supposed to re-populate worklist");
}
+
+ for (std::pair<MachineInstr *, V2PhysSCopyInfo> &Entry : Worklist.WaterFalls)
+ createWaterFall(Entry.first, MDT, Entry.second.MOs, Entry.second.SGPRs);
+
+ for (std::pair<MachineInstr *, bool> Entry : Worklist.V2PhySCopiesToErase)
+ if (Entry.second)
+ Entry.first->eraseFromParent();
+}
+void SIInstrInfo::getReadFirstLaneFromCopyToM0(MachineRegisterInfo &MRI,
+ Register DstReg,
+ MachineInstr &Inst) const {
+ // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and
+ // hope for the best.
+ if (MRI.constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
+ .add(Inst.getOperand(1));
+ } else {
+ unsigned RegSize = RI.getRegSizeInBits(DstReg, MRI);
+ unsigned NumSubRegs = RegSize / 32;
+ if (NumSubRegs == 1) {
+ Register NewDst = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
+ .add(Inst.getOperand(1));
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY),
+ DstReg)
+ .addReg(NewDst);
+ } else {
+ SmallVector<Register, 8> DstRegs;
+ for (unsigned i = 0; i < NumSubRegs; ++i) {
+ Register NewDst =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
+ .addReg(Inst.getOperand(1).getReg(), 0, RI.getSubRegFromChannel(i));
+ DstRegs.push_back(NewDst);
+ }
+ MachineInstrBuilder MIB =
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::REG_SEQUENCE), DstReg);
+ for (unsigned i = 0; i < NumSubRegs; ++i) {
+ MIB.addReg(DstRegs[i]);
+ MIB.addImm(RI.getSubRegFromChannel(i));
+ }
+ }
+ }
+}
+
+void SIInstrInfo::handleCopyToPhyHelper(SIInstrWorklist &Worklist,
+ Register DstReg, MachineInstr &Inst,
+ MachineRegisterInfo &MRI) const {
+ if (DstReg == AMDGPU::M0) {
+ getReadFirstLaneFromCopyToM0(MRI, DstReg, Inst);
+ Worklist.V2PhySCopiesToErase.try_emplace(&Inst, true);
+ return;
+ }
+ Register SrcReg = Inst.getOperand(1).getReg();
+ MachineBasicBlock::iterator I = Inst.getIterator();
+ MachineBasicBlock::iterator E = Inst.getParent()->end();
+ // Only search current block since phyreg's def & use cannot cross
+ // blocks when MF.NoPhi = false.
+ while (++I != E) {
+ // Currently, we only support waterfall on SI_CALL_ISEL.
+ if (I->getOpcode() == AMDGPU::SI_CALL_ISEL) {
+ MachineInstr *UseMI = &*I;
+ for (unsigned i = 0; i < UseMI->getNumOperands(); ++i) {
+ if (UseMI->getOperand(i).isReg() &&
+ UseMI->getOperand(i).getReg() == DstReg) {
+ MachineOperand *MO = &UseMI->getOperand(i);
+ MO->setReg(SrcReg);
+ V2PhysSCopyInfo &V2SCopyInfo = Worklist.WaterFalls[UseMI];
+ V2SCopyInfo.MOs.push_back(MO);
+ V2SCopyInfo.SGPRs.push_back(DstReg);
+ Worklist.V2PhySCopiesToErase.try_emplace(&Inst, true);
+ }
+ }
+ } else if (I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG &&
+ I->getOperand(0).isReg() &&
+ I->getOperand(0).getReg() == DstReg) {
+ getReadFirstLaneFromCopyToM0(MRI, DstReg, Inst);
----------------
jmmartinez wrote:
I see that this function is named `getReadFirstLaneFromCopyToM0` but after looking quickly over it it seems that it can handle other copies to physical registers; am I right ?
https://github.com/llvm/llvm-project/pull/146997
More information about the llvm-commits
mailing list