[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RBSelect (PR #112863)
Thorsten Schütt via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Oct 19 11:45:19 PDT 2024
================
@@ -63,4 +70,189 @@ char &llvm::AMDGPURBSelectID = AMDGPURBSelect::ID;
FunctionPass *llvm::createAMDGPURBSelectPass() { return new AMDGPURBSelect(); }
-bool AMDGPURBSelect::runOnMachineFunction(MachineFunction &MF) { return true; }
+bool shouldRBSelect(MachineInstr &MI) {
+ if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode())
+ return false;
+
+ if (MI.getOpcode() == AMDGPU::PHI || MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
+ return false;
+
+ if (MI.isInlineAsm())
+ return false;
+
+ return true;
+}
+
+void setRB(MachineInstr &MI, MachineOperand &DefOP, MachineIRBuilder B,
+ MachineRegisterInfo &MRI, const RegisterBank &RB) {
+ Register Reg = DefOP.getReg();
+ // Register that already has Register class got it during pre-inst selection
+ // of another instruction. Maybe cross bank copy was required so we insert a
+ // copy trat can be removed later. This simplifies post-rb-legalize artifact
+ // combiner and avoids need to special case some patterns.
+ if (MRI.getRegClassOrNull(Reg)) {
+ LLT Ty = MRI.getType(Reg);
+ Register NewReg = MRI.createVirtualRegister({&RB, Ty});
+ DefOP.setReg(NewReg);
+
+ auto &MBB = *MI.getParent();
+ B.setInsertPt(MBB, MI.isPHI() ? MBB.getFirstNonPHI()
+ : std::next(MI.getIterator()));
+ B.buildCopy(Reg, NewReg);
+
+ // The problem was discoverd for uniform S1 that was used as both
+ // lane mask(vcc) and regular sgpr S1.
+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
+ // - the regular regular sgpr S1(uniform) instruction is now broken since
+ // it uses sreg_64_xexec(S1) which is divergent.
+
+ // "Clear" reg classes from uses on generic instructions and but register
+ // banks instead.
+ for (auto &UseMI : MRI.use_instructions(Reg)) {
+ if (shouldRBSelect(UseMI)) {
+ for (MachineOperand &Op : UseMI.operands()) {
+ if (Op.isReg() && Op.isUse() && Op.getReg() == Reg)
+ Op.setReg(NewReg);
+ }
+ }
+ }
+
+ } else {
+ MRI.setRegBank(Reg, RB);
+ }
+}
+
+void setRBUse(MachineInstr &MI, MachineOperand &UseOP, MachineIRBuilder B,
+ MachineRegisterInfo &MRI, const RegisterBank &RB) {
+ Register Reg = UseOP.getReg();
+
+ LLT Ty = MRI.getType(Reg);
+ Register NewReg = MRI.createVirtualRegister({&RB, Ty});
+ UseOP.setReg(NewReg);
+
+ if (MI.isPHI()) {
+ auto DefMI = MRI.getVRegDef(Reg)->getIterator();
+ MachineBasicBlock *DefMBB = DefMI->getParent();
+ B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
+ } else {
+ B.setInstr(MI);
+ }
+
+ B.buildCopy(NewReg, Reg);
+}
+
+// Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
+// the cycle
+// Note: uniformity analysis does not consider that registers with vgpr def are
+// divergent (you can have uniform value in vgpr).
+// - TODO: implicit use of $exec could be implemented as indicator that
+// instruction is divergent
+bool isTemporalDivergenceCopy(Register Reg, MachineRegisterInfo &MRI) {
+ MachineInstr *MI = MRI.getVRegDef(Reg);
+ if (MI->getOpcode() == AMDGPU::COPY) {
+ for (auto Op : MI->implicit_operands()) {
+ if (!Op.isReg())
+ continue;
+ Register Reg = Op.getReg();
+ if (Reg == AMDGPU::EXEC) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+Register getVReg(MachineOperand &Op) {
+ if (!Op.isReg())
+ return 0;
+
+ Register Reg = Op.getReg();
+ if (!Reg.isVirtual())
+ return 0;
+
+ return Reg;
+}
+
+bool AMDGPURBSelect::runOnMachineFunction(MachineFunction &MF) {
+ MachineUniformityInfo &MUI =
+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
+ AMDGPU::IntrinsicLaneMaskAnalyzer ILMA(MF);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const RegisterBankInfo &RBI = *MF.getSubtarget().getRegBankInfo();
+
+ MachineIRBuilder B(MF);
+
+ // Assign register banks to ALL def registers on G_ instructions.
+ // Same for copies if they have no register bank or class on def.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!shouldRBSelect(MI))
+ continue;
+
+ for (MachineOperand &DefOP : MI.defs()) {
+ Register DefReg = getVReg(DefOP);
+ if (!DefReg)
+ continue;
+
+ // Copies can have register class on def registers.
+ if (MI.isCopy() && MRI.getRegClassOrNull(DefReg)) {
+ continue;
+ }
+
+ if (MUI.isUniform(DefReg) || ILMA.isS32S64LaneMask(DefReg)) {
+ setRB(MI, DefOP, B, MRI, RBI.getRegBank(AMDGPU::SGPRRegBankID));
+ } else {
+ if (MRI.getType(DefReg) == LLT::scalar(1))
+ setRB(MI, DefOP, B, MRI, RBI.getRegBank(AMDGPU::VCCRegBankID));
+ else
+ setRB(MI, DefOP, B, MRI, RBI.getRegBank(AMDGPU::VGPRRegBankID));
+ }
+ }
+ }
+ }
+
+ // At this point all virtual registers have register class or bank
+ // - Defs of G_ instructions have register banks.
+ // - Defs and uses of inst-selected instructions have register class.
+ // - Defs and uses of copies can have either register class or bank
+ // and most notably
+ // - Uses of G_ instructions can have either register class or bank
+
+ // Reassign uses of G_ instructions to only have register banks.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!shouldRBSelect(MI))
+ continue;
+
+ // Copies can have register class on use registers.
+ if (MI.isCopy())
+ continue;
+
+ for (MachineOperand &UseOP : MI.uses()) {
+ Register UseReg = getVReg(UseOP);
----------------
tschuett wrote:
Please use std::optional.
https://github.com/llvm/llvm-project/pull/112863
More information about the llvm-branch-commits
mailing list