[llvm-branch-commits] [GlobalISel] Combiner: Observer-based DCE and retrying of combines (PR #102163)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Aug 6 08:31:35 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: Tobias Stadler (tobias-stadler)

<details>
<summary>Changes</summary>

Continues the work for disabling fixed-point iteration in the Combiner
(#<!-- -->94291).

This introduces improved Observer-based heuristics in the
GISel Combiner to retry combining defs/uses of modified instructions and
for performing sparse dead code elimination.

I have experimented a lot with the heuristics and this seems to be the
minimal set of heuristics that allows disabling fixed-point iteration
for AArch64 CTMark O2 without regressions.
Enabling this globally would pass all regression tests for all official
targets (apart from small benign diffs), but I have made this fully
opt-in for now, because I can't quantify the impact for other targets.

For performance numbers see my follow-up patch for AArch64.


---
Full diff: https://github.com/llvm/llvm-project/pull/102163.diff


3 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/Combiner.h (+8-2) 
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h (+25) 
- (modified) llvm/lib/CodeGen/GlobalISel/Combiner.cpp (+180-38) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
index f826601544932..fa6a7be6cf6c3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
@@ -15,13 +15,13 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_H
 #define LLVM_CODEGEN_GLOBALISEL_COMBINER_H
 
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 
 namespace llvm {
 class MachineRegisterInfo;
-struct CombinerInfo;
 class GISelCSEInfo;
 class TargetPassConfig;
 class MachineFunction;
@@ -33,8 +33,12 @@ class MachineIRBuilder;
 /// TODO: Is it worth making this module-wide?
 class Combiner : public GIMatchTableExecutor {
 private:
+  using WorkListTy = GISelWorkList<512>;
+
   class WorkListMaintainer;
-  GISelWorkList<512> WorkList;
+  template <CombinerInfo::ObserverLevel Lvl> class WorkListMaintainerImpl;
+
+  WorkListTy WorkList;
 
   // We have a little hack here where keep the owned pointers private, and only
   // expose a reference. This has two purposes:
@@ -48,6 +52,8 @@ class Combiner : public GIMatchTableExecutor {
 
   bool HasSetupMF = false;
 
+  static bool tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI);
+
 public:
   /// If CSEInfo is not null, then the Combiner will use CSEInfo as the observer
   /// and also create a CSEMIRBuilder. Pass nullptr if CSE is not needed.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 2b0eb71f88082..67f95c962c582 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -53,6 +53,31 @@ struct CombinerInfo {
   /// The maximum number of times the Combiner will iterate over the
   /// MachineFunction. Setting this to 0 enables fixed-point iteration.
   unsigned MaxIterations = 0;
+
+  enum class ObserverLevel {
+    /// Only retry combining created/changed instructions.
+    /// This replicates the legacy default Observer behavior for use with
+    /// fixed-point iteration.
+    Basic,
+    /// Enables Observer-based detection of dead instructions. This can save
+    /// some compile-time if full disabling of fixed-point iteration is not
+    /// desired. If the input IR doesn't contain dead instructions, consider
+    /// disabling \p EnableFullDCE.
+    DCE,
+    /// Enables Observer-based DCE and additional heuristics that retry
+    /// combining defined and used instructions of modified instructions.
+    /// This provides a good balance between compile-time and completeness of
+    /// combining without needing fixed-point iteration.
+    SinglePass,
+  };
+
+  /// Select how the Combiner acts on MIR changes.
+  ObserverLevel ObserverLvl = ObserverLevel::Basic;
+
+  /// Whether dead code elimination is performed before each Combiner iteration.
+  /// If Observer-based DCE is enabled, this controls if a full DCE pass is
+  /// performed before the first Combiner iteration.
+  bool EnableFullDCE = true;
 };
 } // namespace llvm
 
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 49842e5fd65da..c5ec73cd5c65d 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -45,61 +45,189 @@ cl::OptionCategory GICombinerOptionCategory(
 );
 } // end namespace llvm
 
-/// This class acts as the glue the joins the CombinerHelper to the overall
+/// This class acts as the glue that joins the CombinerHelper to the overall
 /// Combine algorithm. The CombinerHelper is intended to report the
 /// modifications it makes to the MIR to the GISelChangeObserver and the
-/// observer subclass will act on these events. In this case, instruction
-/// erasure will cancel any future visits to the erased instruction and
-/// instruction creation will schedule that instruction for a future visit.
-/// Other Combiner implementations may require more complex behaviour from
-/// their GISelChangeObserver subclass.
+/// observer subclass will act on these events.
 class Combiner::WorkListMaintainer : public GISelChangeObserver {
-  using WorkListTy = GISelWorkList<512>;
-  WorkListTy &WorkList;
+protected:
+#ifndef NDEBUG
   /// The instructions that have been created but we want to report once they
   /// have their operands. This is only maintained if debug output is requested.
-#ifndef NDEBUG
-  SetVector<const MachineInstr *> CreatedInstrs;
+  SmallSetVector<const MachineInstr *, 32> CreatedInstrs;
 #endif
+  using Level = CombinerInfo::ObserverLevel;
 
 public:
-  WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {}
+  static std::unique_ptr<WorkListMaintainer>
+  create(Level Lvl, WorkListTy &WorkList, MachineRegisterInfo &MRI);
+
   virtual ~WorkListMaintainer() = default;
 
+  void reportFullyCreatedInstrs() {
+    LLVM_DEBUG({
+      for (auto *MI : CreatedInstrs) {
+        dbgs() << "Created: " << *MI;
+      }
+      CreatedInstrs.clear();
+    });
+  }
+
+  virtual void reset() = 0;
+  virtual void appliedCombine() = 0;
+};
+
+/// A configurable WorkListMaintainer implementation.
+/// The ObserverLevel determines how the WorkListMaintainer reacts to MIR
+/// changes.
+template <CombinerInfo::ObserverLevel Lvl>
+class Combiner::WorkListMaintainerImpl : public Combiner::WorkListMaintainer {
+  WorkListTy &WorkList;
+  MachineRegisterInfo &MRI;
+
+  // Defer handling these instructions until the combine finishes.
+  SmallSetVector<MachineInstr *, 32> DeferList;
+
+  // Track VRegs that (might) have lost a use.
+  SmallSetVector<Register, 32> LostUses;
+
+public:
+  WorkListMaintainerImpl(WorkListTy &WorkList, MachineRegisterInfo &MRI)
+      : WorkList(WorkList), MRI(MRI) {}
+
+  virtual ~WorkListMaintainerImpl() = default;
+
+  void reset() override {
+    DeferList.clear();
+    LostUses.clear();
+  }
+
   void erasingInstr(MachineInstr &MI) override {
-    LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n");
+    // MI will become dangling, remove it from all lists.
+    LLVM_DEBUG(dbgs() << "Erasing: " << MI; CreatedInstrs.remove(&MI));
     WorkList.remove(&MI);
+    if constexpr (Lvl != Level::Basic) {
+      DeferList.remove(&MI);
+      noteLostUses(MI);
+    }
   }
+
   void createdInstr(MachineInstr &MI) override {
-    LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n");
-    WorkList.insert(&MI);
-    LLVM_DEBUG(CreatedInstrs.insert(&MI));
+    LLVM_DEBUG(dbgs() << "Creating: " << MI; CreatedInstrs.insert(&MI));
+    if constexpr (Lvl == Level::Basic)
+      WorkList.insert(&MI);
+    else
+      // Defer handling newly created instructions, because they don't have
+      // operands yet. We also insert them into the WorkList in reverse
+      // order so that they will be combined top down.
+      DeferList.insert(&MI);
   }
+
   void changingInstr(MachineInstr &MI) override {
-    LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n");
-    WorkList.insert(&MI);
+    LLVM_DEBUG(dbgs() << "Changing: " << MI);
+    // Some uses might get dropped when MI is changed.
+    // For now, overapproximate by assuming all uses will be dropped.
+    // TODO: Is a more precise heuristic or manual tracking of use count
+    // decrements worth it?
+    if constexpr (Lvl != Level::Basic)
+      noteLostUses(MI);
   }
+
   void changedInstr(MachineInstr &MI) override {
-    LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n");
-    WorkList.insert(&MI);
+    LLVM_DEBUG(dbgs() << "Changed: " << MI);
+    if constexpr (Lvl == Level::Basic)
+      WorkList.insert(&MI);
+    else
+      // Defer this for DCE
+      DeferList.insert(&MI);
   }
 
-  void reportFullyCreatedInstrs() {
-    LLVM_DEBUG(for (const auto *MI
-                    : CreatedInstrs) {
-      dbgs() << "Created: ";
-      MI->print(dbgs());
-    });
-    LLVM_DEBUG(CreatedInstrs.clear());
+  // Only track changes during the combine and then walk the def/use-chains once
+  // the combine is finished, because:
+  // - instructions might have multiple defs during the combine.
+  // - use counts aren't accurate during the combine.
+  void appliedCombine() override {
+    if constexpr (Lvl == Level::Basic)
+      return;
+
+    // DCE deferred instructions and add them to the WorkList bottom up.
+    while (!DeferList.empty()) {
+      MachineInstr &MI = *DeferList.pop_back_val();
+      if (tryDCE(MI, MRI))
+        continue;
+
+      if constexpr (Lvl >= Level::SinglePass)
+        addUsersToWorkList(MI);
+
+      WorkList.insert(&MI);
+    }
+
+    // Handle instructions that have lost a user.
+    while (!LostUses.empty()) {
+      Register Use = LostUses.pop_back_val();
+      MachineInstr *UseMI = MRI.getVRegDef(Use);
+      if (!UseMI)
+        continue;
+
+      // If DCE succeeds, UseMI's uses are added back to LostUses by
+      // erasingInstr.
+      if (tryDCE(*UseMI, MRI))
+        continue;
+
+      if constexpr (Lvl >= Level::SinglePass) {
+        // OneUse checks are relatively common, so we might be able to combine
+        // the single remaining user of this Reg.
+        if (MRI.hasOneNonDBGUser(Use))
+          WorkList.insert(&*MRI.use_instr_nodbg_begin(Use));
+
+        WorkList.insert(UseMI);
+      }
+    }
+  }
+
+  void noteLostUses(MachineInstr &MI) {
+    for (auto &Use : MI.explicit_uses()) {
+      if (!Use.isReg() || !Use.getReg().isVirtual())
+        continue;
+      LostUses.insert(Use.getReg());
+    }
+  }
+
+  void addUsersToWorkList(MachineInstr &MI) {
+    for (auto &Def : MI.defs()) {
+      Register DefReg = Def.getReg();
+      if (!DefReg.isVirtual())
+        continue;
+      for (auto &UseMI : MRI.use_nodbg_instructions(DefReg)) {
+        WorkList.insert(&UseMI);
+      }
+    }
   }
 };
 
+std::unique_ptr<Combiner::WorkListMaintainer>
+Combiner::WorkListMaintainer::create(Level Lvl, WorkListTy &WorkList,
+                                     MachineRegisterInfo &MRI) {
+  switch (Lvl) {
+  case Level::Basic:
+    return std::make_unique<WorkListMaintainerImpl<Level::Basic>>(WorkList,
+                                                                  MRI);
+  case Level::DCE:
+    return std::make_unique<WorkListMaintainerImpl<Level::DCE>>(WorkList, MRI);
+  case Level::SinglePass:
+    return std::make_unique<WorkListMaintainerImpl<Level::SinglePass>>(WorkList,
+                                                                       MRI);
+  }
+  llvm_unreachable("Illegal ObserverLevel");
+}
+
 Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo,
                    const TargetPassConfig *TPC, GISelKnownBits *KB,
                    GISelCSEInfo *CSEInfo)
     : Builder(CSEInfo ? std::make_unique<CSEMIRBuilder>()
                       : std::make_unique<MachineIRBuilder>()),
-      WLObserver(std::make_unique<WorkListMaintainer>(WorkList)),
+      WLObserver(WorkListMaintainer::create(CInfo.ObserverLvl, WorkList,
+                                            MF.getRegInfo())),
       ObserverWrapper(std::make_unique<GISelObserverWrapper>()), CInfo(CInfo),
       Observer(*ObserverWrapper), B(*Builder), MF(MF), MRI(MF.getRegInfo()),
       KB(KB), TPC(TPC), CSEInfo(CSEInfo) {
@@ -115,6 +243,15 @@ Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo,
 
 Combiner::~Combiner() = default;
 
+bool Combiner::tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI) {
+  if (!isTriviallyDead(MI, MRI))
+    return false;
+  LLVM_DEBUG(dbgs() << "Dead: " << MI);
+  llvm::salvageDebugInfo(MRI, MI);
+  MI.eraseFromParent();
+  return true;
+}
+
 bool Combiner::combineMachineInstrs() {
   // If the ISel pipeline failed, do not bother running this pass.
   // FIXME: Should this be here or in individual combiner passes.
@@ -141,27 +278,29 @@ bool Combiner::combineMachineInstrs() {
     ++Iteration;
     LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n');
 
+    Changed = false;
     WorkList.clear();
+    WLObserver->reset();
     ObserverWrapper->clearObservers();
     if (CSEInfo)
       ObserverWrapper->addObserver(CSEInfo);
 
+    // If Observer-based DCE is enabled, perform full DCE only before the first
+    // iteration.
+    bool EnableDCE = CInfo.ObserverLvl >= CombinerInfo::ObserverLevel::DCE
+                         ? CInfo.EnableFullDCE && Iteration == 1
+                         : CInfo.EnableFullDCE;
+
     // Collect all instructions. Do a post order traversal for basic blocks and
     // insert with list bottom up, so while we pop_back_val, we'll traverse top
     // down RPOT.
-    Changed = false;
-
     RAIIMFObsDelInstaller DelInstall(MF, *ObserverWrapper);
     for (MachineBasicBlock *MBB : post_order(&MF)) {
       for (MachineInstr &CurMI :
            llvm::make_early_inc_range(llvm::reverse(*MBB))) {
         // Erase dead insts before even adding to the list.
-        if (isTriviallyDead(CurMI, MRI)) {
-          LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
-          llvm::salvageDebugInfo(MRI, CurMI);
-          CurMI.eraseFromParent();
+        if (EnableDCE && tryDCE(CurMI, MRI))
           continue;
-        }
         WorkList.deferred_insert(&CurMI);
       }
     }
@@ -171,10 +310,13 @@ bool Combiner::combineMachineInstrs() {
     ObserverWrapper->addObserver(WLObserver.get());
     // Main Loop. Process the instructions here.
     while (!WorkList.empty()) {
-      MachineInstr *CurrInst = WorkList.pop_back_val();
-      LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;);
-      Changed |= tryCombineAll(*CurrInst);
-      WLObserver->reportFullyCreatedInstrs();
+      MachineInstr &CurrInst = *WorkList.pop_back_val();
+      LLVM_DEBUG(dbgs() << "\nTry combining " << CurrInst);
+      bool AppliedCombine = tryCombineAll(CurrInst);
+      LLVM_DEBUG(WLObserver->reportFullyCreatedInstrs());
+      Changed |= AppliedCombine;
+      if (AppliedCombine)
+        WLObserver->appliedCombine();
     }
     MFChanged |= Changed;
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/102163


More information about the llvm-branch-commits mailing list