[llvm] GlobalISel lane masks merging (PR #73337)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 28 06:00:24 PST 2023
================
@@ -0,0 +1,213 @@
+//===-- AMDGPUGlobalISelDivergenceLowering.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// GlobalISel pass that selects divergent i1 phis as lane mask phis.
+// Lane mask merging uses same algorithm as SDAG in SILowerI1Copies.
+// Handles all cases of temporal divergence.
+//
+// For divergent non-phi i1 and uniform i1 uses outside of the cycle this pass
+// currently depends on LCSSA to insert phis with one incoming.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SILowerI1Copies.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineUniformityAnalysis.h"
+#include "llvm/InitializePasses.h"
+
+#define DEBUG_TYPE "global-isel-divergence-lowering"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ AMDGPUGlobalISelDivergenceLowering() : MachineFunctionPass(ID) {
+ initializeAMDGPUGlobalISelDivergenceLoweringPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "GlobalISel divergence lowering";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+class DivergenceLoweringHelper : public PhiLoweringHelper {
+public:
+ DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT,
+ MachineUniformityInfo *MUI);
+
+private:
+ MachineUniformityInfo *MUI = nullptr;
+
+public:
+ void markAsLaneMask(Register DstReg) const override;
+ void getCandidatesForLowering(
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
+ void collectIncomingValuesFromPhi(
+ const MachineInstr *MI,
+ SmallVectorImpl<Incoming> &Incomings) const override;
+ void replaceDstReg(Register NewReg, Register OldReg,
+ MachineBasicBlock *MBB) override;
+ void buildMergeLaneMasks(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ Register DstReg, Register PrevReg,
+ Register CurReg) override;
+ void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
+};
+
+DivergenceLoweringHelper::DivergenceLoweringHelper(
+ MachineFunction *MF, MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
+ : PhiLoweringHelper(MF, DT, PDT), MUI(MUI) {}
+
+// _(s1) -> SReg_32/64(s1)
+void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
+ assert(MRI->getType(DstReg) == LLT::scalar(1));
+
+ // Can't blindly set a register class on phi, users could have reg class
+ // constraints (e.g. sreg_32/64..._xexec classes for control flow intrinsics).
+ if (MRI->getRegClassOrNull(DstReg))
+ return;
+
+ MRI->setRegClass(DstReg, ST->getBoolRC());
+ return;
+}
+
+void DivergenceLoweringHelper::getCandidatesForLowering(
+ SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
+ LLT S1 = LLT::scalar(1);
+
+ // Add divergent i1 phis to the list
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB.phis()) {
+ Register Dst = MI.getOperand(0).getReg();
+ if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
+ Vreg1Phis.push_back(&MI);
+ }
+ }
+
+ return;
+}
+
+void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
+ const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
+ for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
+ Incomings.emplace_back(MI->getOperand(i).getReg(),
+ MI->getOperand(i + 1).getMBB(), Register());
+ }
+}
+
+void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
+ MachineBasicBlock *MBB) {
+ BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
+ .addReg(NewReg);
+}
+
+// Get pointers to build instruction just after MI (skips phis if needed)
+static std::pair<MachineBasicBlock *, MachineBasicBlock::iterator>
+getInsertAfterPtrs(MachineInstr *MI) {
+ MachineBasicBlock *InsertMBB = MI->getParent();
+ return std::make_pair(
+ InsertMBB, InsertMBB->SkipPHIsAndLabels(std::next(MI->getIterator())));
+}
+
+void DivergenceLoweringHelper::buildMergeLaneMasks(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
+ Register DstReg, Register PrevReg, Register CurReg) {
+ // TODO: check if inputs are constants or results of a compare.
+
+ Register PrevRegCopy = createLaneMaskReg(MRI, LaneMaskRegAttrs);
+ auto [PrevMBB, AfterPrevReg] = getInsertAfterPtrs(MRI->getVRegDef(PrevReg));
+ BuildMI(*PrevMBB, AfterPrevReg, DL, TII->get(AMDGPU::COPY), PrevRegCopy)
+ .addReg(PrevReg);
+ Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
+ BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
+ .addReg(PrevRegCopy)
+ .addReg(ExecReg);
+
+ Register CurRegCopy = createLaneMaskReg(MRI, LaneMaskRegAttrs);
+ auto [CurMBB, AfterCurReg] = getInsertAfterPtrs(MRI->getVRegDef(CurReg));
+ BuildMI(*CurMBB, AfterCurReg, DL, TII->get(AMDGPU::COPY), CurRegCopy)
+ .addReg(CurReg);
+ Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
+ BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
+ .addReg(ExecReg)
+ .addReg(CurRegCopy);
+
+ BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
+ .addReg(PrevMaskedReg)
+ .addReg(CurMaskedReg);
+
+ return;
+}
+
+// GlobalISel has to constrain S1 incoming taken as-is with lane mask register
+// class. Insert a copy of Incoming.Reg to new lane mask inside Incoming.Block,
+// Incoming.Reg becomes that new lane mask.
+void DivergenceLoweringHelper::constrainIncomingRegisterTakenAsIs(
+ Incoming &In) {
+ MachineIRBuilder B(*MF);
+ B.setInsertPt(*In.Block, In.Block->getFirstTerminator());
+
+ auto Copy = B.buildCopy(LLT::scalar(1), In.Reg);
+ MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());
+ In.Reg = Copy.getReg(0);
+
+ return;
----------------
Pierre-vh wrote:
extra `return`
https://github.com/llvm/llvm-project/pull/73337
More information about the llvm-commits
mailing list