[llvm-branch-commits] [GlobalISel] Combiner: Observer-based DCE and retrying of combines (PR #102163)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 6 08:31:35 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Tobias Stadler (tobias-stadler)
<details>
<summary>Changes</summary>
Continues the work for disabling fixed-point iteration in the Combiner
(#<!-- -->94291).
This introduces improved Observer-based heuristics in the
GISel Combiner to retry combining defs/uses of modified instructions and
for performing sparse dead code elimination.
I have experimented a lot with the heuristics and this seems to be the
minimal set of heuristics that allows disabling fixed-point iteration
for AArch64 CTMark O2 without regressions.
Enabling this globally would pass all regression tests for all official
targets (apart from small benign diffs), but I have made this fully
opt-in for now, because I can't quantify the impact for other targets.
For performance numbers see my follow-up patch for AArch64.
---
Full diff: https://github.com/llvm/llvm-project/pull/102163.diff
3 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/Combiner.h (+8-2)
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h (+25)
- (modified) llvm/lib/CodeGen/GlobalISel/Combiner.cpp (+180-38)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
index f826601544932..fa6a7be6cf6c3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
@@ -15,13 +15,13 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_H
#define LLVM_CODEGEN_GLOBALISEL_COMBINER_H
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
namespace llvm {
class MachineRegisterInfo;
-struct CombinerInfo;
class GISelCSEInfo;
class TargetPassConfig;
class MachineFunction;
@@ -33,8 +33,12 @@ class MachineIRBuilder;
/// TODO: Is it worth making this module-wide?
class Combiner : public GIMatchTableExecutor {
private:
+ using WorkListTy = GISelWorkList<512>;
+
class WorkListMaintainer;
- GISelWorkList<512> WorkList;
+ template <CombinerInfo::ObserverLevel Lvl> class WorkListMaintainerImpl;
+
+ WorkListTy WorkList;
// We have a little hack here where keep the owned pointers private, and only
// expose a reference. This has two purposes:
@@ -48,6 +52,8 @@ class Combiner : public GIMatchTableExecutor {
bool HasSetupMF = false;
+ static bool tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI);
+
public:
/// If CSEInfo is not null, then the Combiner will use CSEInfo as the observer
/// and also create a CSEMIRBuilder. Pass nullptr if CSE is not needed.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 2b0eb71f88082..67f95c962c582 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -53,6 +53,31 @@ struct CombinerInfo {
/// The maximum number of times the Combiner will iterate over the
/// MachineFunction. Setting this to 0 enables fixed-point iteration.
unsigned MaxIterations = 0;
+
+ enum class ObserverLevel {
+ /// Only retry combining created/changed instructions.
+ /// This replicates the legacy default Observer behavior for use with
+ /// fixed-point iteration.
+ Basic,
+ /// Enables Observer-based detection of dead instructions. This can save
+ /// some compile-time if full disabling of fixed-point iteration is not
+ /// desired. If the input IR doesn't contain dead instructions, consider
+ /// disabling \p EnableFullDCE.
+ DCE,
+ /// Enables Observer-based DCE and additional heuristics that retry
+ /// combining defined and used instructions of modified instructions.
+ /// This provides a good balance between compile-time and completeness of
+ /// combining without needing fixed-point iteration.
+ SinglePass,
+ };
+
+ /// Select how the Combiner acts on MIR changes.
+ ObserverLevel ObserverLvl = ObserverLevel::Basic;
+
+ /// Whether dead code elimination is performed before each Combiner iteration.
+ /// If Observer-based DCE is enabled, this controls if a full DCE pass is
+ /// performed before the first Combiner iteration.
+ bool EnableFullDCE = true;
};
} // namespace llvm
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 49842e5fd65da..c5ec73cd5c65d 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -45,61 +45,189 @@ cl::OptionCategory GICombinerOptionCategory(
);
} // end namespace llvm
-/// This class acts as the glue the joins the CombinerHelper to the overall
+/// This class acts as the glue that joins the CombinerHelper to the overall
/// Combine algorithm. The CombinerHelper is intended to report the
/// modifications it makes to the MIR to the GISelChangeObserver and the
-/// observer subclass will act on these events. In this case, instruction
-/// erasure will cancel any future visits to the erased instruction and
-/// instruction creation will schedule that instruction for a future visit.
-/// Other Combiner implementations may require more complex behaviour from
-/// their GISelChangeObserver subclass.
+/// observer subclass will act on these events.
class Combiner::WorkListMaintainer : public GISelChangeObserver {
- using WorkListTy = GISelWorkList<512>;
- WorkListTy &WorkList;
+protected:
+#ifndef NDEBUG
/// The instructions that have been created but we want to report once they
/// have their operands. This is only maintained if debug output is requested.
-#ifndef NDEBUG
- SetVector<const MachineInstr *> CreatedInstrs;
+ SmallSetVector<const MachineInstr *, 32> CreatedInstrs;
#endif
+ using Level = CombinerInfo::ObserverLevel;
public:
- WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {}
+ static std::unique_ptr<WorkListMaintainer>
+ create(Level Lvl, WorkListTy &WorkList, MachineRegisterInfo &MRI);
+
virtual ~WorkListMaintainer() = default;
+ void reportFullyCreatedInstrs() {
+ LLVM_DEBUG({
+ for (auto *MI : CreatedInstrs) {
+ dbgs() << "Created: " << *MI;
+ }
+ CreatedInstrs.clear();
+ });
+ }
+
+ virtual void reset() = 0;
+ virtual void appliedCombine() = 0;
+};
+
+/// A configurable WorkListMaintainer implementation.
+/// The ObserverLevel determines how the WorkListMaintainer reacts to MIR
+/// changes.
+template <CombinerInfo::ObserverLevel Lvl>
+class Combiner::WorkListMaintainerImpl : public Combiner::WorkListMaintainer {
+ WorkListTy &WorkList;
+ MachineRegisterInfo &MRI;
+
+ // Defer handling these instructions until the combine finishes.
+ SmallSetVector<MachineInstr *, 32> DeferList;
+
+ // Track VRegs that (might) have lost a use.
+ SmallSetVector<Register, 32> LostUses;
+
+public:
+ WorkListMaintainerImpl(WorkListTy &WorkList, MachineRegisterInfo &MRI)
+ : WorkList(WorkList), MRI(MRI) {}
+
+ virtual ~WorkListMaintainerImpl() = default;
+
+ void reset() override {
+ DeferList.clear();
+ LostUses.clear();
+ }
+
void erasingInstr(MachineInstr &MI) override {
- LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n");
+ // MI will become dangling, remove it from all lists.
+ LLVM_DEBUG(dbgs() << "Erasing: " << MI; CreatedInstrs.remove(&MI));
WorkList.remove(&MI);
+ if constexpr (Lvl != Level::Basic) {
+ DeferList.remove(&MI);
+ noteLostUses(MI);
+ }
}
+
void createdInstr(MachineInstr &MI) override {
- LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n");
- WorkList.insert(&MI);
- LLVM_DEBUG(CreatedInstrs.insert(&MI));
+ LLVM_DEBUG(dbgs() << "Creating: " << MI; CreatedInstrs.insert(&MI));
+ if constexpr (Lvl == Level::Basic)
+ WorkList.insert(&MI);
+ else
+ // Defer handling newly created instructions, because they don't have
+ // operands yet. We also insert them into the WorkList in reverse
+ // order so that they will be combined top down.
+ DeferList.insert(&MI);
}
+
void changingInstr(MachineInstr &MI) override {
- LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n");
- WorkList.insert(&MI);
+ LLVM_DEBUG(dbgs() << "Changing: " << MI);
+ // Some uses might get dropped when MI is changed.
+ // For now, overapproximate by assuming all uses will be dropped.
+ // TODO: Is a more precise heuristic or manual tracking of use count
+ // decrements worth it?
+ if constexpr (Lvl != Level::Basic)
+ noteLostUses(MI);
}
+
void changedInstr(MachineInstr &MI) override {
- LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n");
- WorkList.insert(&MI);
+ LLVM_DEBUG(dbgs() << "Changed: " << MI);
+ if constexpr (Lvl == Level::Basic)
+ WorkList.insert(&MI);
+ else
+ // Defer this for DCE
+ DeferList.insert(&MI);
}
- void reportFullyCreatedInstrs() {
- LLVM_DEBUG(for (const auto *MI
- : CreatedInstrs) {
- dbgs() << "Created: ";
- MI->print(dbgs());
- });
- LLVM_DEBUG(CreatedInstrs.clear());
+ // Only track changes during the combine and then walk the def/use-chains once
+ // the combine is finished, because:
+ // - instructions might have multiple defs during the combine.
+ // - use counts aren't accurate during the combine.
+ void appliedCombine() override {
+ if constexpr (Lvl == Level::Basic)
+ return;
+
+ // DCE deferred instructions and add them to the WorkList bottom up.
+ while (!DeferList.empty()) {
+ MachineInstr &MI = *DeferList.pop_back_val();
+ if (tryDCE(MI, MRI))
+ continue;
+
+ if constexpr (Lvl >= Level::SinglePass)
+ addUsersToWorkList(MI);
+
+ WorkList.insert(&MI);
+ }
+
+ // Handle instructions that have lost a user.
+ while (!LostUses.empty()) {
+ Register Use = LostUses.pop_back_val();
+ MachineInstr *UseMI = MRI.getVRegDef(Use);
+ if (!UseMI)
+ continue;
+
+ // If DCE succeeds, UseMI's uses are added back to LostUses by
+ // erasingInstr.
+ if (tryDCE(*UseMI, MRI))
+ continue;
+
+ if constexpr (Lvl >= Level::SinglePass) {
+ // OneUse checks are relatively common, so we might be able to combine
+ // the single remaining user of this Reg.
+ if (MRI.hasOneNonDBGUser(Use))
+ WorkList.insert(&*MRI.use_instr_nodbg_begin(Use));
+
+ WorkList.insert(UseMI);
+ }
+ }
+ }
+
+ void noteLostUses(MachineInstr &MI) {
+ for (auto &Use : MI.explicit_uses()) {
+ if (!Use.isReg() || !Use.getReg().isVirtual())
+ continue;
+ LostUses.insert(Use.getReg());
+ }
+ }
+
+ void addUsersToWorkList(MachineInstr &MI) {
+ for (auto &Def : MI.defs()) {
+ Register DefReg = Def.getReg();
+ if (!DefReg.isVirtual())
+ continue;
+ for (auto &UseMI : MRI.use_nodbg_instructions(DefReg)) {
+ WorkList.insert(&UseMI);
+ }
+ }
}
};
+std::unique_ptr<Combiner::WorkListMaintainer>
+Combiner::WorkListMaintainer::create(Level Lvl, WorkListTy &WorkList,
+ MachineRegisterInfo &MRI) {
+ switch (Lvl) {
+ case Level::Basic:
+ return std::make_unique<WorkListMaintainerImpl<Level::Basic>>(WorkList,
+ MRI);
+ case Level::DCE:
+ return std::make_unique<WorkListMaintainerImpl<Level::DCE>>(WorkList, MRI);
+ case Level::SinglePass:
+ return std::make_unique<WorkListMaintainerImpl<Level::SinglePass>>(WorkList,
+ MRI);
+ }
+ llvm_unreachable("Illegal ObserverLevel");
+}
+
Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo,
const TargetPassConfig *TPC, GISelKnownBits *KB,
GISelCSEInfo *CSEInfo)
: Builder(CSEInfo ? std::make_unique<CSEMIRBuilder>()
: std::make_unique<MachineIRBuilder>()),
- WLObserver(std::make_unique<WorkListMaintainer>(WorkList)),
+ WLObserver(WorkListMaintainer::create(CInfo.ObserverLvl, WorkList,
+ MF.getRegInfo())),
ObserverWrapper(std::make_unique<GISelObserverWrapper>()), CInfo(CInfo),
Observer(*ObserverWrapper), B(*Builder), MF(MF), MRI(MF.getRegInfo()),
KB(KB), TPC(TPC), CSEInfo(CSEInfo) {
@@ -115,6 +243,15 @@ Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo,
Combiner::~Combiner() = default;
+bool Combiner::tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ if (!isTriviallyDead(MI, MRI))
+ return false;
+ LLVM_DEBUG(dbgs() << "Dead: " << MI);
+ llvm::salvageDebugInfo(MRI, MI);
+ MI.eraseFromParent();
+ return true;
+}
+
bool Combiner::combineMachineInstrs() {
// If the ISel pipeline failed, do not bother running this pass.
// FIXME: Should this be here or in individual combiner passes.
@@ -141,27 +278,29 @@ bool Combiner::combineMachineInstrs() {
++Iteration;
LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n');
+ Changed = false;
WorkList.clear();
+ WLObserver->reset();
ObserverWrapper->clearObservers();
if (CSEInfo)
ObserverWrapper->addObserver(CSEInfo);
+ // If Observer-based DCE is enabled, perform full DCE only before the first
+ // iteration.
+ bool EnableDCE = CInfo.ObserverLvl >= CombinerInfo::ObserverLevel::DCE
+ ? CInfo.EnableFullDCE && Iteration == 1
+ : CInfo.EnableFullDCE;
+
// Collect all instructions. Do a post order traversal for basic blocks and
// insert with list bottom up, so while we pop_back_val, we'll traverse top
// down RPOT.
- Changed = false;
-
RAIIMFObsDelInstaller DelInstall(MF, *ObserverWrapper);
for (MachineBasicBlock *MBB : post_order(&MF)) {
for (MachineInstr &CurMI :
llvm::make_early_inc_range(llvm::reverse(*MBB))) {
// Erase dead insts before even adding to the list.
- if (isTriviallyDead(CurMI, MRI)) {
- LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
- llvm::salvageDebugInfo(MRI, CurMI);
- CurMI.eraseFromParent();
+ if (EnableDCE && tryDCE(CurMI, MRI))
continue;
- }
WorkList.deferred_insert(&CurMI);
}
}
@@ -171,10 +310,13 @@ bool Combiner::combineMachineInstrs() {
ObserverWrapper->addObserver(WLObserver.get());
// Main Loop. Process the instructions here.
while (!WorkList.empty()) {
- MachineInstr *CurrInst = WorkList.pop_back_val();
- LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;);
- Changed |= tryCombineAll(*CurrInst);
- WLObserver->reportFullyCreatedInstrs();
+ MachineInstr &CurrInst = *WorkList.pop_back_val();
+ LLVM_DEBUG(dbgs() << "\nTry combining " << CurrInst);
+ bool AppliedCombine = tryCombineAll(CurrInst);
+ LLVM_DEBUG(WLObserver->reportFullyCreatedInstrs());
+ Changed |= AppliedCombine;
+ if (AppliedCombine)
+ WLObserver->appliedCombine();
}
MFChanged |= Changed;
``````````
</details>
https://github.com/llvm/llvm-project/pull/102163
More information about the llvm-branch-commits
mailing list