[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankSelect (PR #112863)
Petar Avramovic via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Oct 31 06:22:54 PDT 2024
================
@@ -66,9 +73,215 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() {
return new AMDGPURegBankSelect();
}
+class RegBankSelectHelper {
+ MachineIRBuilder &B;
+ MachineRegisterInfo &MRI;
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
+ const MachineUniformityInfo &MUI;
+ const SIRegisterInfo &TRI;
+ const RegisterBank *SgprRB;
+ const RegisterBank *VgprRB;
+ const RegisterBank *VccRB;
+
+public:
+ RegBankSelectHelper(MachineIRBuilder &B,
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
+ const MachineUniformityInfo &MUI,
+ const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
+ : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI),
+ SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
+ VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
+ VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
+
+ bool shouldRegBankSelect(MachineInstr &MI) {
+ return MI.isPreISelOpcode() || MI.isCopy();
+ }
+
+ // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
+ // the cycle
+ // Note: uniformity analysis does not consider that registers with vgpr def
+ // are divergent (you can have uniform value in vgpr).
+ // - TODO: implicit use of $exec could be implemented as indicator that
+ // instruction is divergent
+ bool isTemporalDivergenceCopy(Register Reg) {
+ MachineInstr *MI = MRI.getVRegDef(Reg);
+ if (!MI->isCopy())
+ return false;
+
+ for (auto Op : MI->implicit_operands()) {
+ if (!Op.isReg())
+ continue;
+
+ if (Op.getReg() == TRI.getExec()) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ void setRBDef(MachineInstr &MI, MachineOperand &DefOP,
+ const RegisterBank *RB) {
+ Register Reg = DefOP.getReg();
+ // Register that already has Register class got it during pre-inst selection
+ // of another instruction. Maybe cross bank copy was required so we insert a
+ // copy that can be removed later. This simplifies post regbanklegalize
+ // combiner and avoids need to special case some patterns.
+ if (MRI.getRegClassOrNull(Reg)) {
+ LLT Ty = MRI.getType(Reg);
+ Register NewReg = MRI.createVirtualRegister({RB, Ty});
+ DefOP.setReg(NewReg);
+
+ auto &MBB = *MI.getParent();
+ B.setInsertPt(MBB, MBB.SkipPHIsAndLabels(std::next(MI.getIterator())));
+ B.buildCopy(Reg, NewReg);
+
+ // The problem was discovered for uniform S1 that was used as both
+ // lane mask(vcc) and regular sgpr S1.
+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
+ // - the regular sgpr S1(uniform) instruction is now broken since
+ // it uses sreg_64_xexec(S1) which is divergent.
+
+ // Replace virtual registers with register class on generic instructions
+ // uses with virtual registers with register bank.
+ for (auto &UseMI : MRI.use_instructions(Reg)) {
+ if (shouldRegBankSelect(UseMI)) {
+ for (MachineOperand &Op : UseMI.operands()) {
+ if (Op.isReg() && Op.getReg() == Reg)
+ Op.setReg(NewReg);
+ }
+ }
+ }
+
+ } else {
+ MRI.setRegBank(Reg, *RB);
+ }
+ }
+
+ std::optional<Register> tryGetVReg(MachineOperand &Op) {
+ if (!Op.isReg())
+ return std::nullopt;
+
+ Register Reg = Op.getReg();
+ if (!Reg.isVirtual())
+ return std::nullopt;
+
+ return Reg;
+ }
+
+ void assignBanksOnDefs(MachineInstr &MI) {
+ if (!shouldRegBankSelect(MI))
+ return;
+
+ for (MachineOperand &DefOP : MI.defs()) {
+ auto MaybeDefReg = tryGetVReg(DefOP);
+ if (!MaybeDefReg)
+ continue;
+ Register DefReg = *MaybeDefReg;
+
+ // Copies can have register class on def registers.
+ if (MI.isCopy() && MRI.getRegClassOrNull(DefReg)) {
+ continue;
+ }
+
+ if (MUI.isUniform(DefReg) || ILMA.isS32S64LaneMask(DefReg)) {
+ setRBDef(MI, DefOP, SgprRB);
+ } else {
+ if (MRI.getType(DefReg) == LLT::scalar(1))
+ setRBDef(MI, DefOP, VccRB);
+ else
+ setRBDef(MI, DefOP, VgprRB);
+ }
+ }
+ }
+
+ void constrainRBUse(MachineInstr &MI, MachineOperand &UseOP,
+ const RegisterBank *RB) {
+ Register Reg = UseOP.getReg();
+
+ LLT Ty = MRI.getType(Reg);
+ Register NewReg = MRI.createVirtualRegister({RB, Ty});
+ UseOP.setReg(NewReg);
+
+ if (MI.isPHI()) {
+ auto DefMI = MRI.getVRegDef(Reg)->getIterator();
+ MachineBasicBlock *DefMBB = DefMI->getParent();
+ B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
+ } else {
+ B.setInstr(MI);
+ }
+
+ B.buildCopy(NewReg, Reg);
+ }
+
+ void constrainBanksOnUses(MachineInstr &MI) {
+ if (!shouldRegBankSelect(MI))
+ return;
+
+ // Copies can have register class on use registers.
+ if (MI.isCopy())
+ return;
+
+ for (MachineOperand &UseOP : MI.uses()) {
+ auto MaybeUseReg = tryGetVReg(UseOP);
+ if (!MaybeUseReg)
+ continue;
+ Register UseReg = *MaybeUseReg;
+
+ // UseReg already has register bank.
+ if (MRI.getRegBankOrNull(UseReg))
+ continue;
+
+ if (!isTemporalDivergenceCopy(UseReg) &&
+ (MUI.isUniform(UseReg) || ILMA.isS32S64LaneMask(UseReg))) {
+ constrainRBUse(MI, UseOP, SgprRB);
+ } else {
+ if (MRI.getType(UseReg) == LLT::scalar(1))
+ constrainRBUse(MI, UseOP, VccRB);
+ else
+ constrainRBUse(MI, UseOP, VgprRB);
+ }
+ }
+ }
+};
+
bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
+
+ MachineIRBuilder B(MF);
----------------
petar-avramovic wrote:
Added, but I don't see a reason for it. Here we only set register banks and insert a few copies, there is nothing for CSE to find. Legalizer part actually makes use of CSE often.
https://github.com/llvm/llvm-project/pull/112863
More information about the llvm-branch-commits
mailing list