[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: AMDGPURegBankSelect (PR #112863)

Petar Avramovic via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Oct 31 06:22:54 PDT 2024


================
@@ -66,9 +73,215 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() {
   return new AMDGPURegBankSelect();
 }
 
+class RegBankSelectHelper {
+  MachineIRBuilder &B;
+  MachineRegisterInfo &MRI;
+  AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
+  const MachineUniformityInfo &MUI;
+  const SIRegisterInfo &TRI;
+  const RegisterBank *SgprRB;
+  const RegisterBank *VgprRB;
+  const RegisterBank *VccRB;
+
+public:
+  RegBankSelectHelper(MachineIRBuilder &B,
+                      AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
+                      const MachineUniformityInfo &MUI,
+                      const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
+      : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI),
+        SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
+        VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
+        VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
+
+  bool shouldRegBankSelect(MachineInstr &MI) {
+    return MI.isPreISelOpcode() || MI.isCopy();
+  }
+
+  // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
+  // the cycle
+  // Note: uniformity analysis does not consider that registers with vgpr def
+  // are divergent (you can have uniform value in vgpr).
+  // - TODO: implicit use of $exec could be implemented as indicator that
+  //   instruction is divergent
+  bool isTemporalDivergenceCopy(Register Reg) {
+    MachineInstr *MI = MRI.getVRegDef(Reg);
+    if (!MI->isCopy())
+      return false;
+
+    for (auto Op : MI->implicit_operands()) {
+      if (!Op.isReg())
+        continue;
+
+      if (Op.getReg() == TRI.getExec()) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  void setRBDef(MachineInstr &MI, MachineOperand &DefOP,
+                const RegisterBank *RB) {
+    Register Reg = DefOP.getReg();
+    // Register that already has Register class got it during pre-inst selection
+    // of another instruction. Maybe cross bank copy was required so we insert a
+    // copy that can be removed later. This simplifies post regbanklegalize
+    // combiner and avoids need to special case some patterns.
+    if (MRI.getRegClassOrNull(Reg)) {
+      LLT Ty = MRI.getType(Reg);
+      Register NewReg = MRI.createVirtualRegister({RB, Ty});
+      DefOP.setReg(NewReg);
+
+      auto &MBB = *MI.getParent();
+      B.setInsertPt(MBB, MBB.SkipPHIsAndLabels(std::next(MI.getIterator())));
+      B.buildCopy(Reg, NewReg);
+
+      // The problem was discovered for uniform S1 that was used as both
+      // lane mask(vcc) and regular sgpr S1.
+      // - lane-mask(vcc) use was by si_if, this use is divergent and requires
+      //   non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
+      //   sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
+      // - the regular sgpr S1(uniform) instruction is now broken since
+      //   it uses sreg_64_xexec(S1) which is divergent.
+
+      // Replace virtual registers with register class on generic instructions
+      // uses with virtual registers with register bank.
+      for (auto &UseMI : MRI.use_instructions(Reg)) {
+        if (shouldRegBankSelect(UseMI)) {
+          for (MachineOperand &Op : UseMI.operands()) {
+            if (Op.isReg() && Op.getReg() == Reg)
+              Op.setReg(NewReg);
+          }
+        }
+      }
+
+    } else {
+      MRI.setRegBank(Reg, *RB);
+    }
+  }
+
+  std::optional<Register> tryGetVReg(MachineOperand &Op) {
+    if (!Op.isReg())
+      return std::nullopt;
+
+    Register Reg = Op.getReg();
+    if (!Reg.isVirtual())
+      return std::nullopt;
+
+    return Reg;
+  }
+
+  void assignBanksOnDefs(MachineInstr &MI) {
+    if (!shouldRegBankSelect(MI))
+      return;
+
+    for (MachineOperand &DefOP : MI.defs()) {
+      auto MaybeDefReg = tryGetVReg(DefOP);
+      if (!MaybeDefReg)
+        continue;
+      Register DefReg = *MaybeDefReg;
+
+      // Copies can have register class on def registers.
+      if (MI.isCopy() && MRI.getRegClassOrNull(DefReg)) {
+        continue;
+      }
+
+      if (MUI.isUniform(DefReg) || ILMA.isS32S64LaneMask(DefReg)) {
+        setRBDef(MI, DefOP, SgprRB);
+      } else {
+        if (MRI.getType(DefReg) == LLT::scalar(1))
+          setRBDef(MI, DefOP, VccRB);
+        else
+          setRBDef(MI, DefOP, VgprRB);
+      }
+    }
+  }
+
+  void constrainRBUse(MachineInstr &MI, MachineOperand &UseOP,
+                      const RegisterBank *RB) {
+    Register Reg = UseOP.getReg();
+
+    LLT Ty = MRI.getType(Reg);
+    Register NewReg = MRI.createVirtualRegister({RB, Ty});
+    UseOP.setReg(NewReg);
+
+    if (MI.isPHI()) {
+      auto DefMI = MRI.getVRegDef(Reg)->getIterator();
+      MachineBasicBlock *DefMBB = DefMI->getParent();
+      B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
+    } else {
+      B.setInstr(MI);
+    }
+
+    B.buildCopy(NewReg, Reg);
+  }
+
+  void constrainBanksOnUses(MachineInstr &MI) {
+    if (!shouldRegBankSelect(MI))
+      return;
+
+    // Copies can have register class on use registers.
+    if (MI.isCopy())
+      return;
+
+    for (MachineOperand &UseOP : MI.uses()) {
+      auto MaybeUseReg = tryGetVReg(UseOP);
+      if (!MaybeUseReg)
+        continue;
+      Register UseReg = *MaybeUseReg;
+
+      // UseReg already has register bank.
+      if (MRI.getRegBankOrNull(UseReg))
+        continue;
+
+      if (!isTemporalDivergenceCopy(UseReg) &&
+          (MUI.isUniform(UseReg) || ILMA.isS32S64LaneMask(UseReg))) {
+        constrainRBUse(MI, UseOP, SgprRB);
+      } else {
+        if (MRI.getType(UseReg) == LLT::scalar(1))
+          constrainRBUse(MI, UseOP, VccRB);
+        else
+          constrainRBUse(MI, UseOP, VgprRB);
+      }
+    }
+  }
+};
+
 bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
   if (MF.getProperties().hasProperty(
           MachineFunctionProperties::Property::FailedISel))
     return false;
+
+  MachineIRBuilder B(MF);
----------------
petar-avramovic wrote:

Added, but I don't see a reason for it. Here we only set register banks and insert a few copies, there is nothing for CSE to find. Legalizer part actually makes use of CSE often.

https://github.com/llvm/llvm-project/pull/112863


More information about the llvm-branch-commits mailing list