[llvm] AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis (PR #78482)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 22 05:35:14 PST 2024
================
@@ -42,14 +45,152 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineUniformityAnalysisPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
+class DivergenceLoweringHelper : public PhiLoweringHelper {
+public:
+ DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT,
+ MachineUniformityInfo *MUI);
+
+private:
+ MachineUniformityInfo *MUI = nullptr;
+
+public:
+ void markAsLaneMask(Register DstReg) const override;
+ void getCandidatesForLowering(
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
+ void collectIncomingValuesFromPhi(
+ const MachineInstr *MI,
+ SmallVectorImpl<Incoming> &Incomings) const override;
+ void replaceDstReg(Register NewReg, Register OldReg,
+ MachineBasicBlock *MBB) override;
+ void buildMergeLaneMasks(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ Register DstReg, Register PrevReg,
+ Register CurReg) override;
+ void constrainAsLaneMask(Incoming &In) override;
+};
+
+DivergenceLoweringHelper::DivergenceLoweringHelper(
+ MachineFunction *MF, MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
+ : PhiLoweringHelper(MF, DT, PDT), MUI(MUI) {}
+
+// _(s1) -> SReg_32/64(s1)
+void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
+ assert(MRI->getType(DstReg) == LLT::scalar(1));
+
+ if (MRI->getRegClassOrNull(DstReg)) {
+ MRI->constrainRegClass(DstReg, ST->getBoolRC());
+ return;
+ }
+
+ MRI->setRegClass(DstReg, ST->getBoolRC());
+}
+
+void DivergenceLoweringHelper::getCandidatesForLowering(
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
+ LLT S1 = LLT::scalar(1);
+
+ // Add divergent i1 phis to the list
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB.phis()) {
+ Register Dst = MI.getOperand(0).getReg();
+ if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
+ Vreg1Phis.push_back(&MI);
+ }
+ }
+}
+
+void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
+ const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
+ for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
+ Incomings.emplace_back(MI->getOperand(i).getReg(),
+ MI->getOperand(i + 1).getMBB(), Register());
+ }
+}
+
+void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
+ MachineBasicBlock *MBB) {
+ BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
+ .addReg(NewReg);
+}
+
+// Get pointers to build instruction just after MI (skips phis if needed)
+static std::pair<MachineBasicBlock *, MachineBasicBlock::iterator>
+getInsertAfterPtrs(MachineInstr *MI) {
+ MachineBasicBlock *InsertMBB = MI->getParent();
+ return {InsertMBB,
+ InsertMBB->SkipPHIsAndLabels(std::next(MI->getIterator()))};
+}
+
+// bb.previous
+// %PrevReg = ...
+//
+// bb.current
+// %CurReg = ...
+//
+// %DstReg - not defined
+//
+// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
+//
+// bb.previous
+// %PrevReg = ...
+// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
+//
+// bb.current
+// %CurReg = ...
+// %CurRegCopy:sreg_32(s1) = COPY %CurReg
+// ...
+// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
+// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
+// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
+//
+// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
----------------
Pierre-vh wrote:
Indent the code in the BBs so it's easier to follow
```suggestion
// bb.previous
// %PrevReg = ...
//
// bb.current
// %CurReg = ...
//
// %DstReg - not defined
//
// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
//
// bb.previous
// %PrevReg = ...
// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
//
// bb.current
// %CurReg = ...
// %CurRegCopy:sreg_32(s1) = COPY %CurReg
// ...
// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
//
// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
```
https://github.com/llvm/llvm-project/pull/78482
More information about the llvm-commits
mailing list