mir-canon simplified patch
Justin Bogner via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 31 02:15:38 PDT 2017
Puyan Lotfi <puyan.lotfi.llvm at gmail.com> writes:
> Thanks Justin!
>
> I have an updated patch, addressing the provided feedback.
Looking better. I have a few more nitpicks below, and this could
probably use a couple of simple tests for the basic functionality.
I also ran it on a few random examples in test/AArch64/GlobalISel and
noticed that it's a bit picky about exactly what kind of MIR it can
handle right now, but it seems fine to improve that part once the
initial patch goes in.
> commit 38551ee3aa2e3723fc4dec970e9229d9e0cdd807
> Author: Puyan Lotfi <plotfi at apple.com>
> Date: Fri Oct 27 16:26:10 2017 -0700
>
> mir-canon: First commit.
>
> mir-canon (MIRCanonicalizerPass) is a pass designed to reorder
> instructions and rename operands so that two similar programs will diff
> more cleanly after bring run through mir-canon than they could
> otherwise. This project is still a work in progress and there are ideas
> still being discussed for improving diff quality.
>
> diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
> index 6b0e6acadad..18be1d250c8 100644
> --- a/include/llvm/InitializePasses.h
> +++ b/include/llvm/InitializePasses.h
> @@ -378,6 +378,7 @@ void initializeWinEHPreparePass(PassRegistry&);
> void initializeWriteBitcodePassPass(PassRegistry&);
> void initializeWriteThinLTOBitcodePass(PassRegistry&);
> void initializeXRayInstrumentationPass(PassRegistry&);
> +void initializeMIRCanonicalizerPass(PassRegistry &);
>
> } // end namespace llvm
>
> diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
> index 7ec7fda4e44..2e364cd4794 100644
> --- a/lib/CodeGen/CMakeLists.txt
> +++ b/lib/CodeGen/CMakeLists.txt
> @@ -113,6 +113,7 @@ add_llvm_library(LLVMCodeGen
> RegisterPressure.cpp
> RegisterScavenging.cpp
> RenameIndependentSubregs.cpp
> + MIRCanonicalizerPass.cpp
> RegisterUsageInfo.cpp
> RegUsageInfoCollector.cpp
> RegUsageInfoPropagate.cpp
> diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
> index f4ccb4889d3..bfab865687e 100644
> --- a/lib/CodeGen/CodeGen.cpp
> +++ b/lib/CodeGen/CodeGen.cpp
> @@ -99,6 +99,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
> initializeVirtRegRewriterPass(Registry);
> initializeWinEHPreparePass(Registry);
> initializeXRayInstrumentationPass(Registry);
> + initializeMIRCanonicalizerPass(Registry);
> }
>
> void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
> diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp
> new file mode 100644
> index 00000000000..b8c85e4ba37
> --- /dev/null
> +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp
> @@ -0,0 +1,641 @@
> +//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// The purpose of this pass is to establish a canonical operand naming so that
> +// code compiled with slightly different IR passes can be diffed more
> +// effectively than otherwise. This is done by renaming vregs in a given
> +// LiveRange in a canonical way. This pass also does a pseudo-scheduling to move
> +// defs closer to their use inorder to reduce diffs caused by slightly different
> +// schedules. At the moment code coming out of this pass is meant to be diffed
> +// manually with a diff tool like vimdiff or Beyond Compare and is not meant for
> +// executable code generation, however it is possible that this may change in
> +// the future.
This is better, though I'd probably drop the last sentence - it seems a
bit redundant with the point that we're reducing diffs here. It may be
worth pointing out the basic usage (ie. `llc -run-pass=mir-canonicalizer`)
> +//
> +// Reorders instructions canonically.
> +// Renames virtual register operands canonically.
> +// Strips certain MIR artifacts (optionally).
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/ADT/PostOrderIterator.h"
> +#include "llvm/ADT/STLExtras.h"
> +#include "llvm/CodeGen/Passes.h"
> +#include "llvm/CodeGen/MachineFunctionPass.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/Target/TargetInstrInfo.h"
> +
> +#include <queue>
> +
> +using namespace llvm;
> +
> +namespace llvm {
> +extern char &MIRCanonicalizerID;
> +} // namespace llvm
> +
> +#define DEBUG_TYPE "mir-canonicalizer"
> +
> +static cl::opt<unsigned>
> +CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
> + cl::value_desc("N"),
> + cl::desc("Function number to canonicalize."));
> +
> +static cl::opt<unsigned>
> +CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u),
> + cl::value_desc("N"),
> + cl::desc("BasicBlock number to canonicalize."));
> +
> +namespace {
> +
> +class MIRCanonicalizer : public MachineFunctionPass {
> +public:
> + static char ID;
> + MIRCanonicalizer() : MachineFunctionPass(ID) {}
> +
> + StringRef getPassName() const override {
> + return "Rename register operands in a canonical ordering.";
> + }
> +
> + void getAnalysisUsage(AnalysisUsage &AU) const override {
> + AU.setPreservesCFG();
> + MachineFunctionPass::getAnalysisUsage(AU);
> + }
> +
> + bool runOnMachineFunction(MachineFunction &MF) override;
> +};
> +
> +} // end anonymous namespace
> +
> +enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
> +class TypedVReg {
I'd actually meant something as simple as:
struct TypedVReg { VRType Type; unsigned Reg; }
but this class with helpers to make it harder to misuse makes sense.
> + VRType type;
> + unsigned reg;
> +
> + TypedVReg(): type(RSE_Reg), reg(~0U) { }
Why bother with a private default constructor? It'd be better to just
not have one at all, I think.
> +
> +public:
> + TypedVReg(unsigned _reg): type(RSE_Reg), reg(_reg) { }
LLVM style usually just aliases the parameters with the member variables
rather than doing the underscore thing, so just:
TypedVReg(unsigned Reg): type(RSE_Reg), Reg(Reg) {}
> + TypedVReg(VRType _type): type(_type), reg(~0U) { }
This should probably assert that Type != RSE_Reg
> + TypedVReg(const TypedVReg &TR): type(TR.type), reg(TR.reg) { }
> +
> + TypedVReg& operator=(const TypedVReg &rhs) {
> + type = rhs.type;
> + reg = rhs.reg;
> + return *this;
> + }
These should both be default, so just omit them (or write them as =
default if you're feeling verbose).
> +
> + VRType getType() const { return type; }
> + unsigned getReg() const {
> + assert(this->isReg() && "Expected a virtual or physical register.");
> + return reg;
> + }
> +
> + bool isReg() const { return type == RSE_Reg; }
> + bool isFrameIndex() const { return type == RSE_FrameIndex; }
> + bool isCandidate() const { return type == RSE_NewCandidate; }
> +};
> +
> +char MIRCanonicalizer::ID;
> +
> +char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
> +
> +INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
> + "Rename Register Operands Canonically", false, false);
> +
> +INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
> + "Rename Register Operands Canonically", false, false);
> +
> +static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
> + ReversePostOrderTraversal<MachineBasicBlock*> RPOT(&*MF.begin());
> + std::vector<MachineBasicBlock*> RPOList;
> + for (auto MBB : RPOT) {
> + RPOList.push_back(MBB);
> + }
> +
> + return RPOList;
> +}
> +
> +// Set a dummy vreg. We use this vregs register class to generate throw-away
> +// vregs that are used to skip vreg numbers so that vreg numbers line up.
> +static unsigned GetDummyVReg(const MachineFunction &MF) {
> + for (auto &MBB : MF) {
> + for (auto &MI : MBB) {
> + for (auto &MO : MI.operands()) {
> + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
> + continue;
> + return MO.getReg();
> + }
> + }
> + }
> +
> + return ~0U;
> +}
> +
> +static bool rescheduleCanoncally(MachineBasicBlock *MBB) {
> +
> + bool Changed = false;
> +
> + // Calculates the distance of MI from the begining of its parent BB.
> + auto getInstrIdx = [](const MachineInstr &MI) {
> + unsigned i = 0;
> + for (auto &CurMI : *MI.getParent()) {
> + if (&CurMI == &MI)
> + return i;
> + i++;
> + }
> + return ~0U;
> + };
> +
> + // Pre-Populate vector of instructions to reschedule so that we don't
> + // clobber the iterator.
> + std::vector<MachineInstr *> Instructions;
> + for (auto &MI : *MBB) {
> + Instructions.push_back(&MI);
> + }
> +
> + for (auto *II : Instructions) {
> + if (II->getNumOperands() == 0)
> + continue;
> +
> + MachineOperand &MO = II->getOperand(0);
> + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
> + continue;
> +
> + DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
> +
> + MachineInstr *Def = II;
> + unsigned Distance = ~0U;
> + MachineInstr *UseToBringDefCloserTo = nullptr;
> + MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
> + for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
> + MachineInstr *UseInst = UO.getParent();
> +
> + const unsigned DefLoc = getInstrIdx(*Def);
> + const unsigned UseLoc = getInstrIdx(*UseInst);
> + const unsigned Delta = (UseLoc - DefLoc);
> +
> + if (UseInst->getParent() != Def->getParent())
> + continue;
> + if (DefLoc >= UseLoc)
> + continue;
> +
> + if (Delta < Distance) {
> + Distance = Delta;
> + UseToBringDefCloserTo = UseInst;
> + }
> + }
> +
> + const auto BBE = MBB->instr_end();
> + MachineBasicBlock::iterator DefI = BBE;
> + MachineBasicBlock::iterator UseI = BBE;
> +
> + for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
> +
> + if (DefI != BBE && UseI != BBE)
> + break;
> +
> + if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo))
> + continue;
> +
> + if (&*BBI == Def) {
> + DefI = BBI;
> + continue;
> + }
> +
> + if (&*BBI == UseToBringDefCloserTo) {
> + UseI = BBI;
> + continue;
> + }
> + }
> +
> + if (DefI == BBE || UseI == BBE)
> + continue;
> +
> + DEBUG({
> + dbgs() << "Splicing ";
> + DefI->dump();
> + dbgs() << " right before: ";
> + UseI->dump();
> + });
> +
> + Changed = true;
> + MBB->splice(UseI, MBB, DefI);
> + }
> +
> + return Changed;
> +}
> +
> +/// Here we find our candidates. What makes an interesting candidate?
> +/// An candidate for a canonicalization tree root is normally any kind of
> +/// instruction that causes side effects such as a store to memory or a copy to
> +/// a physical register or a return instruction. We use these as an expression
> +/// tree root that we walk inorder to build a canonical walk which should result
> +/// in canoncal vreg renaming.
> +static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
> + std::vector<MachineInstr *> Candidates;
> + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
> +
> + for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
> + MachineInstr *MI = &*II;
> +
> + bool DoesMISideEffect = false;
> +
> + if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
> + const unsigned Dst = MI->getOperand(0).getReg();
> + DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst);
> +
> + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
> + if (DoesMISideEffect) break;
> + DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
> + }
> + }
> +
> + if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
> + continue;
> +
> + DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
> + Candidates.push_back(MI);
> + }
> +
> + return Candidates;
> +}
> +
> +void doCandidateWalk(std::vector<TypedVReg> &VRegs,
> + std::queue <TypedVReg> ®_queue,
> + std::vector<MachineInstr *> &visitedMIs,
Looks like you missed reg_queue and visitedMIs when you were renaming
variables in the llvm style.
> + const MachineBasicBlock *MBB) {
> +
> + const MachineFunction &MF = *MBB->getParent();
> + const MachineRegisterInfo &MRI = MF.getRegInfo();
> +
> + while (reg_queue.size()) {
> +
> + auto TReg = reg_queue.front();
> + reg_queue.pop();
> +
> + if (TReg.isFrameIndex()) {
> + DEBUG(dbgs() << "Popping frame index.\n";);
> + VRegs.push_back(TypedVReg(RSE_FrameIndex));
> + continue;
> + }
> +
> + assert(TReg.isReg() && "Expected vreg or physreg.");
> + auto Reg = TReg.getReg();
It's clearer to write out "unsigned" than to use auto here.
> +
> + if (TargetRegisterInfo::isVirtualRegister(Reg)) {
> + DEBUG({
> + dbgs() << "Popping vreg ";
> + MRI.def_begin(Reg)->dump();
> + dbgs() << "\n";
> + });
> +
> + auto VisitedVRegAt =
> + std::find_if(VRegs.begin(), VRegs.end(),
> + [&](const TypedVReg &TR) {
> + return TR.isReg() && TR.getReg() == Reg;
> + });
Oops, I'd said find_if, but I really meant any_of. This can be even
simpler like so:
bool VisitedVReg = llvm::any_of(VRegs, [&](const TypedVReg &TR) {
return TR.isReg() && TR.getReg() == Reg;
});
> +
> + if (VRegs.end() == VisitedVRegAt)
> + VRegs.push_back(TypedVReg(Reg));
> + } else {
> + DEBUG(dbgs() << "Popping physreg.\n";);
> + VRegs.push_back(TypedVReg(Reg));
> + continue;
> + }
> +
> + for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
> + MachineInstr *Def = RI->getParent();
> +
> + if (Def->getParent() != MBB)
> + continue;
> +
> + auto VisitedMIAt =
> + std::find_if(visitedMIs.begin(), visitedMIs.end(),
> + [&](const MachineInstr* VMI) { return Def == VMI; });
> +
> + if (visitedMIs.end() != VisitedMIAt)
> + break;
> +
> + DEBUG({
> + dbgs() << "\n========================\n";
> + dbgs() << "Visited MI: ";
> + Def->dump();
> + dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
> + dbgs() << "\n========================\n";
> + });
> + visitedMIs.push_back(Def);
> + for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
> +
> + MachineOperand &MO = Def->getOperand(I);
> + if (MO.isFI()) {
> + DEBUG(dbgs() << "Pushing frame index.\n";);
> + reg_queue.push(TypedVReg(RSE_FrameIndex));
> + }
> +
> + if (!MO.isReg())
> + continue;
> + reg_queue.push(TypedVReg(MO.getReg()));
> + }
> + }
> + }
> +}
> +
> +/// TODO: Work to remove this in the future. One day when we have named vregs
> +/// we should be able to form the canonical name based on some characteristic
> +/// we see in that point of the expression tree (like if we were to name based
> +/// on some sort of value numbering scheme).
> +static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI,
> + const TargetRegisterClass *RC) {
> + const unsigned VR_GAP = (++VRegGapIndex * 1000);
> +
> + DEBUG({
> + dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to "
> + << VR_GAP << "\n";
> + });
> +
> + unsigned I = MRI.createVirtualRegister(RC);
> + const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
> + while (I != E) {
> + I = MRI.createVirtualRegister(RC);
> + }
> +}
> +
> +static void GetVRegRenameMap(std::map<unsigned, unsigned> &vregRenameMap,
> + const std::vector<TypedVReg> &VRegs,
> + const std::vector<unsigned> &renamedInOtherBB,
> + MachineRegisterInfo &MRI,
> + const TargetRegisterClass *RC) {
> +
> + unsigned LastRenameReg = MRI.createVirtualRegister(RC);
> + bool FirstCandidate = true;
> +
> + for (auto &vreg : VRegs) {
> + if (vreg.isFrameIndex()) {
> + // We skip one vreg for any frame index because there is a good chance
> + // (especially when comparing SelectionDAG to GlobalISel generated MIR)
> + // that in the other file we are just getting an incoming vreg that comes
> + // from a copy from a frame index. So it's safe to skip by one.
> + LastRenameReg = MRI.createVirtualRegister(RC);
> + DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
> + continue;
> + } else if (vreg.isCandidate()) {
> +
> + // After the first candidate, for every subsequent candidate, we skip mod
> + // 10 registers so that the candidates are more likely to start at the
> + // same vreg number making it more likely that the canonical walk from the
> + // candidate insruction. We don't need to skip from the first candidate of
> + // the BasicBlock because we already skip ahead several vregs for each BB.
> + while (LastRenameReg % 10) {
> + if (!FirstCandidate) break;
> + LastRenameReg = MRI.createVirtualRegister(RC);
> +
> + DEBUG({
> + dbgs() << "Skipping rename for new candidate " << LastRenameReg
> + << "\n";
> + });
> + }
> + FirstCandidate = false;
> + continue;
> + } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) {
> + LastRenameReg = MRI.createVirtualRegister(RC);
> + DEBUG({
> + dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
> + });
> + continue;
> + }
> +
> + auto Reg = vreg.getReg();
> + if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
> + DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";);
> + continue;
> + }
> +
> + auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg));
> + LastRenameReg = Rename;
> +
> + if (vregRenameMap.find(Reg) == vregRenameMap.end()) {
> + DEBUG(dbgs() << "Mapping vreg ";);
> + if (MRI.reg_begin(Reg) != MRI.reg_end()) {
> + DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
> + } else {
> + DEBUG(dbgs() << Reg;);
> + }
> + DEBUG(dbgs() << " to ";);
> + if (MRI.reg_begin(Rename) != MRI.reg_end()) {
> + DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
> + } else {
> + DEBUG(dbgs() << Rename;);
> + }
> + DEBUG(dbgs() << "\n";);
> +
> + vregRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
> + }
> + }
> +}
> +
> +static bool doVRegRenaming(std::vector<unsigned> &renamedInOtherBB,
> + const std::map<unsigned, unsigned> &vregRenameMap,
> + MachineRegisterInfo &MRI) {
> + bool Changed = false;
> + for (auto I = vregRenameMap.begin(), E = vregRenameMap.end(); I != E; ++I) {
> +
> + auto VReg = I->first;
> + auto Rename = I->second;
> +
> + renamedInOtherBB.push_back(Rename);
> +
> + std::vector<MachineOperand *> RenameMOs;
> + for (auto &MO : MRI.reg_operands(VReg)) {
> + RenameMOs.push_back(&MO);
> + }
> +
> + for (auto *MO : RenameMOs) {
> + Changed = true;
> + MO->setReg(Rename);
> +
> + if (!MO->isDef())
> + MO->setIsKill(false);
> + }
> + }
> +
> + return Changed;
> +}
> +
> +static bool doDefKillClear(MachineBasicBlock *MBB) {
> + bool Changed = false;
> +
> + for (auto &MI : *MBB) {
> + for (auto &MO : MI.operands()) {
> + if (!MO.isReg())
> + continue;
> + if (!MO.isDef() && MO.isKill()) {
> + Changed = true;
> + MO.setIsKill(false);
> + }
> +
> + if (MO.isDef() && MO.isDead()) {
> + Changed = true;
> + MO.setIsDead(false);
> + }
> + }
> + }
> +
> + return Changed;
> +}
> +
> +static bool runOnBasicBlock(MachineBasicBlock *MBB,
> + std::vector<StringRef> &bbNames,
> + std::vector<unsigned> &renamedInOtherBB,
> + unsigned &basicBlockNum, unsigned &VRegGapIndex) {
> +
> + if (CanonicalizeBasicBlockNumber != ~0U) {
> + if (CanonicalizeBasicBlockNumber != basicBlockNum++)
> + return false;
> + DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";);
> + }
> +
> + if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
> + DEBUG({
> + dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
> + << "\n";
> + });
> + return false;
> + }
> +
> + DEBUG({
> + dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
> + dbgs() << "\n\n================================================\n\n";
> + });
> +
> + bool Changed = false;
> + MachineFunction &MF = *MBB->getParent();
> + MachineRegisterInfo &MRI = MF.getRegInfo();
> +
> + const TargetRegisterClass *DummyRC = [] (const MachineFunction &MF) {
> + const unsigned DummyVReg = GetDummyVReg(MF);
> + assert(DummyVReg != ~0U && "Could not find a dummy VReg.");
So this pass doesn't work at all if we give it input with no vregs. That
doesn't seem great - maybe we'd be better off making something up in
that case.
> + const MachineRegisterInfo &MRI = MF.getRegInfo();
> + return MRI.getRegClass(DummyVReg);
> + } (MF);
> +
> + bbNames.push_back(MBB->getName());
> + DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
> +
> + DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
> + Changed |= rescheduleCanoncally(MBB);
> + DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
> +
> + std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
> + std::vector<MachineInstr *> VisitedMIs;
> + std::copy(Candidates.begin(), Candidates.end(),
> + std::back_inserter(VisitedMIs));
> +
> + std::vector<TypedVReg> VRegs;
> + for (auto candidate : Candidates) {
> + VRegs.push_back(TypedVReg(RSE_NewCandidate));
> +
> + std::queue<TypedVReg> reg_queue;
> +
> + // Here we walk the vreg operands of a non-root node along our walk.
> + // The root nodes are the original candidates (stores normally).
> + // These are normally not the root nodes (except for the case of copies to
> + // physical registers).
> + for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
> + if (candidate->mayStore() || candidate->isBranch())
> + break;
> +
> + MachineOperand &MO = candidate->getOperand(i);
> + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
> + continue;
> +
> + DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
> + reg_queue.push(TypedVReg(MO.getReg()));
> + }
> +
> + // Here we walk the root candidates. We start from the 0th operand because
> + // the root is normally a store to a vreg.
> + for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
> +
> + if (!candidate->mayStore() && !candidate->isBranch())
> + break;
> +
> + MachineOperand &MO = candidate->getOperand(i);
> +
> + // TODO: Do we want to only add vregs here?
> + if (!MO.isReg() && !MO.isFI())
> + continue;
> +
> + DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
> +
> + reg_queue.push(MO.isReg() ? TypedVReg(MO.getReg()) :
> + TypedVReg(RSE_FrameIndex));
> + }
> +
> + // Now that we have pushed the operands of the candidate here, we do the
> + // full breadth first walk.
> + doCandidateWalk(VRegs, reg_queue, VisitedMIs, MBB);
> + }
> +
> + // If we have populated no vregs to rename then bail.
> + // The rest of this function does the vreg remaping.
> + if (VRegs.size() == 0)
> + return Changed;
> +
> + // Skip some vregs, so we can recon where we'll land next.
> + SkipVRegs(VRegGapIndex, MRI, DummyRC);
> +
> + std::map<unsigned, unsigned> VRegRenameMap;
> + GetVRegRenameMap(VRegRenameMap, VRegs, renamedInOtherBB, MRI, DummyRC);
> +
> + Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
> + Changed |= doDefKillClear(MBB);
> +
> + DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";);
> + DEBUG(dbgs() << "\n\n================================================\n\n");
> + return Changed;
> +}
> +
> +bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
> +
> + static unsigned functionNum = 0;
> + if (CanonicalizeFunctionNumber != ~0U) {
> + if (CanonicalizeFunctionNumber != functionNum++)
> + return false;
> + DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";);
> + }
> +
> + // we need a valid vreg to create a vreg type for skipping all those
> + // stray vreg numbers so reach alignment/canonical vreg values.
> + std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF);
> +
> + DEBUG(
> + dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
> + dbgs() << "\n\n================================================\n\n";
> + dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
> + for (auto MBB : RPOList) {
> + dbgs() << MBB->getName() << "\n";
> + }
> + dbgs() << "\n\n================================================\n\n";
> + );
> +
> + std::vector<StringRef> BBNames;
> + std::vector<unsigned> RenamedInOtherBB;
> +
> + unsigned GapIdx = 0;
> + unsigned BBNum = 0;
> +
> + bool Changed = false;
> +
> + for (auto MBB : RPOList)
> + Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx);
> +
> + return Changed;
> +}
> +
More information about the llvm-commits
mailing list