[llvm] PeepholeOpt: Stop allocating tiny helper classes (NFC) (PR #123936)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 22 17:57:32 PST 2025


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/123936

>From c4c0592d2e82c52668d5d1181a9109629bd0884e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 22 Jan 2025 19:15:29 +0700
Subject: [PATCH] PeepholeOpt: Stop allocating tiny helper classes

This was allocating tiny helper classes for every instruction
visited. We can just dispatch over the cases in the visitor
function instead.
---
 llvm/lib/CodeGen/PeepholeOptimizer.cpp | 1359 ++++++++++++------------
 1 file changed, 675 insertions(+), 684 deletions(-)

diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 5fc8f419e80a5d..aec4aaa81761c7 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -149,288 +149,584 @@ namespace {
 class ValueTrackerResult;
 class RecurrenceInstr;
 
-class PeepholeOptimizer : private MachineFunction::Delegate {
-  const TargetInstrInfo *TII = nullptr;
-  const TargetRegisterInfo *TRI = nullptr;
-  MachineRegisterInfo *MRI = nullptr;
-  MachineDominatorTree *DT = nullptr; // Machine dominator tree
-  MachineLoopInfo *MLI = nullptr;
-
+/// Interface to query instructions amenable to copy rewriting.
+class Rewriter {
+protected:
+  MachineInstr &CopyLike;
+  unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
 public:
-  PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
-      : DT(DT), MLI(MLI) {}
-
-  bool run(MachineFunction &MF);
-  /// Track Def -> Use info used for rewriting copies.
-  using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
-
-  /// Sequence of instructions that formulate recurrence cycle.
-  using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
-
-private:
-  bool optimizeCmpInstr(MachineInstr &MI);
-  bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
-                        SmallPtrSetImpl<MachineInstr *> &LocalMIs);
-  bool optimizeSelect(MachineInstr &MI,
-                      SmallPtrSetImpl<MachineInstr *> &LocalMIs);
-  bool optimizeCondBranch(MachineInstr &MI);
-  bool optimizeCoalescableCopy(MachineInstr &MI);
-  bool optimizeUncoalescableCopy(MachineInstr &MI,
-                                 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
-  bool optimizeRecurrence(MachineInstr &PHI);
-  bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
-  bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
-                       DenseMap<Register, MachineInstr *> &ImmDefMIs);
-  bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
-                     DenseMap<Register, MachineInstr *> &ImmDefMIs,
-                     bool &Deleted);
-
-  /// Finds recurrence cycles, but only ones that formulated around
-  /// a def operand and a use operand that are tied. If there is a use
-  /// operand commutable with the tied use operand, find recurrence cycle
-  /// along that operand as well.
-  bool findTargetRecurrence(Register Reg,
-                            const SmallSet<Register, 2> &TargetReg,
-                            RecurrenceCycle &RC);
-
-  /// If copy instruction \p MI is a virtual register copy or a copy of a
-  /// constant physical register to a virtual register, track it in the
-  /// set CopySrcMIs. If this virtual register was previously seen as a
-  /// copy, replace the uses of this copy with the previously seen copy's
-  /// destination register.
-  bool foldRedundantCopy(MachineInstr &MI);
-
-  /// Is the register \p Reg a non-allocatable physical register?
-  bool isNAPhysCopy(Register Reg);
+  Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
+  virtual ~Rewriter() = default;
 
-  /// If copy instruction \p MI is a non-allocatable virtual<->physical
-  /// register copy, track it in the \p NAPhysToVirtMIs map. If this
-  /// non-allocatable physical register was previously copied to a virtual
-  /// registered and hasn't been clobbered, the virt->phys copy can be
-  /// deleted.
-  bool
-  foldRedundantNAPhysCopy(MachineInstr &MI,
-                          DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
+  /// Get the next rewritable source (SrcReg, SrcSubReg) and
+  /// the related value that it affects (DstReg, DstSubReg).
+  /// A source is considered rewritable if its register class and the
+  /// register class of the related DstReg may not be register
+  /// coalescer friendly. In other words, given a copy-like instruction
+  /// not all the arguments may be returned at rewritable source, since
+  /// some arguments are none to be register coalescer friendly.
+  ///
+  /// Each call of this method moves the current source to the next
+  /// rewritable source.
+  /// For instance, let CopyLike be the instruction to rewrite.
+  /// CopyLike has one definition and one source:
+  /// dst.dstSubIdx = CopyLike src.srcSubIdx.
+  ///
+  /// The first call will give the first rewritable source, i.e.,
+  /// the only source this instruction has:
+  /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
+  /// This source defines the whole definition, i.e.,
+  /// (DstReg, DstSubReg) = (dst, dstSubIdx).
+  ///
+  /// The second and subsequent calls will return false, as there is only one
+  /// rewritable source.
+  ///
+  /// \return True if a rewritable source has been found, false otherwise.
+  /// The output arguments are valid if and only if true is returned.
+  virtual bool getNextRewritableSource(RegSubRegPair &Src,
+                                       RegSubRegPair &Dst) = 0;
 
-  bool isLoadFoldable(MachineInstr &MI,
-                      SmallSet<Register, 16> &FoldAsLoadDefCandidates);
+  /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
+  /// \return True if the rewriting was possible, false otherwise.
+  virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
+};
 
-  /// Check whether \p MI is understood by the register coalescer
-  /// but may require some rewriting.
-  bool isCoalescableCopy(const MachineInstr &MI) {
-    // SubregToRegs are not interesting, because they are already register
-    // coalescer friendly.
-    return MI.isCopy() ||
-           (!DisableAdvCopyOpt && (MI.isRegSequence() || MI.isInsertSubreg() ||
-                                   MI.isExtractSubreg()));
+/// Rewriter for COPY instructions.
+class CopyRewriter : public Rewriter {
+public:
+  CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
+    assert(MI.isCopy() && "Expected copy instruction");
   }
+  virtual ~CopyRewriter() = default;
 
-  /// Check whether \p MI is a copy like instruction that is
-  /// not recognized by the register coalescer.
-  bool isUncoalescableCopy(const MachineInstr &MI) {
-    return MI.isBitcast() || (!DisableAdvCopyOpt && (MI.isRegSequenceLike() ||
-                                                     MI.isInsertSubregLike() ||
-                                                     MI.isExtractSubregLike()));
+  bool getNextRewritableSource(RegSubRegPair &Src,
+                               RegSubRegPair &Dst) override {
+    // CurrentSrcIdx > 0 means this function has already been called.
+    if (CurrentSrcIdx > 0)
+      return false;
+    // This is the first call to getNextRewritableSource.
+    // Move the CurrentSrcIdx to remember that we made that call.
+    CurrentSrcIdx = 1;
+    // The rewritable source is the argument.
+    const MachineOperand &MOSrc = CopyLike.getOperand(1);
+    Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
+    // What we track are the alternative sources of the definition.
+    const MachineOperand &MODef = CopyLike.getOperand(0);
+    Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+    return true;
   }
 
-  MachineInstr &rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def,
-                              RewriteMapTy &RewriteMap);
-
-  // Set of copies to virtual registers keyed by source register.  Never
-  // holds any physreg which requires def tracking.
-  DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
+  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+    if (CurrentSrcIdx != 1)
+      return false;
+    MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
+    MOSrc.setReg(NewReg);
+    MOSrc.setSubReg(NewSubReg);
+    return true;
+  }
+};
 
-  // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs.
-  void MF_HandleInsertion(MachineInstr &MI) override { return; }
+/// Helper class to rewrite uncoalescable copy like instructions
+/// into new COPY (coalescable friendly) instructions.
+class UncoalescableRewriter : public Rewriter {
+  unsigned NumDefs; ///< Number of defs in the bitcast.
 
-  bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) {
-    if (!MI.isCopy())
-      return false;
+public:
+  UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
+    NumDefs = MI.getDesc().getNumDefs();
+  }
 
-    Register SrcReg = MI.getOperand(1).getReg();
-    unsigned SrcSubReg = MI.getOperand(1).getSubReg();
-    if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
+  /// \see See Rewriter::getNextRewritableSource()
+  /// All such sources need to be considered rewritable in order to
+  /// rewrite a uncoalescable copy-like instruction. This method return
+  /// each definition that must be checked if rewritable.
+  bool getNextRewritableSource(RegSubRegPair &Src,
+                               RegSubRegPair &Dst) override {
+    // Find the next non-dead definition and continue from there.
+    if (CurrentSrcIdx == NumDefs)
       return false;
 
-    SrcPair = RegSubRegPair(SrcReg, SrcSubReg);
-    return true;
-  }
+    while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
+      ++CurrentSrcIdx;
+      if (CurrentSrcIdx == NumDefs)
+        return false;
+    }
 
-  // If a COPY instruction is to be deleted or changed, we should also remove
-  // it from CopySrcMIs.
-  void deleteChangedCopy(MachineInstr &MI) {
-    RegSubRegPair SrcPair;
-    if (!getCopySrc(MI, SrcPair))
-      return;
+    // What we track are the alternative sources of the definition.
+    Src = RegSubRegPair(0, 0);
+    const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
+    Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
 
-    auto It = CopySrcMIs.find(SrcPair);
-    if (It != CopySrcMIs.end() && It->second == &MI)
-      CopySrcMIs.erase(It);
+    CurrentSrcIdx++;
+    return true;
   }
 
-  void MF_HandleRemoval(MachineInstr &MI) override { deleteChangedCopy(MI); }
-
-  void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override {
-    deleteChangedCopy(MI);
+  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+    return false;
   }
 };
 
-class PeepholeOptimizerLegacy : public MachineFunctionPass {
+/// Specialized rewriter for INSERT_SUBREG instruction.
+class InsertSubregRewriter : public Rewriter {
 public:
-  static char ID; // Pass identification
-
-  PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {
-    initializePeepholeOptimizerLegacyPass(*PassRegistry::getPassRegistry());
+  InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
+    assert(MI.isInsertSubreg() && "Invalid instruction");
   }
 
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  /// \see See Rewriter::getNextRewritableSource()
+  /// Here CopyLike has the following form:
+  /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
+  /// Src1 has the same register class has dst, hence, there is
+  /// nothing to rewrite.
+  /// Src2.src2SubIdx, may not be register coalescer friendly.
+  /// Therefore, the first call to this method returns:
+  /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+  /// (DstReg, DstSubReg) = (dst, subIdx).
+  ///
+  /// Subsequence calls will return false.
+  bool getNextRewritableSource(RegSubRegPair &Src,
+                               RegSubRegPair &Dst) override {
+    // If we already get the only source we can rewrite, return false.
+    if (CurrentSrcIdx == 2)
+      return false;
+    // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
+    CurrentSrcIdx = 2;
+    const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
+    Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
+    const MachineOperand &MODef = CopyLike.getOperand(0);
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-    AU.addRequired<MachineLoopInfoWrapperPass>();
-    AU.addPreserved<MachineLoopInfoWrapperPass>();
-    if (Aggressive) {
-      AU.addRequired<MachineDominatorTreeWrapperPass>();
-      AU.addPreserved<MachineDominatorTreeWrapperPass>();
-    }
+    // We want to track something that is compatible with the
+    // partial definition.
+    if (MODef.getSubReg())
+      // Bail if we have to compose sub-register indices.
+      return false;
+    Dst = RegSubRegPair(MODef.getReg(),
+                        (unsigned)CopyLike.getOperand(3).getImm());
+    return true;
   }
 
-  MachineFunctionProperties getRequiredProperties() const override {
-    return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::IsSSA);
-  }
-};
-
-/// Helper class to hold instructions that are inside recurrence cycles.
-/// The recurrence cycle is formulated around 1) a def operand and its
-/// tied use operand, or 2) a def operand and a use operand that is commutable
-/// with another use operand which is tied to the def operand. In the latter
-/// case, index of the tied use operand and the commutable use operand are
-/// maintained with CommutePair.
-class RecurrenceInstr {
-public:
-  using IndexPair = std::pair<unsigned, unsigned>;
-
-  RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
-  RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
-      : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
-
-  MachineInstr *getMI() const { return MI; }
-  std::optional<IndexPair> getCommutePair() const { return CommutePair; }
-
-private:
-  MachineInstr *MI;
-  std::optional<IndexPair> CommutePair;
+  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+    if (CurrentSrcIdx != 2)
+      return false;
+    // We are rewriting the inserted reg.
+    MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+    MO.setReg(NewReg);
+    MO.setSubReg(NewSubReg);
+    return true;
+  }
 };
 
-/// Helper class to hold a reply for ValueTracker queries.
-/// Contains the returned sources for a given search and the instructions
-/// where the sources were tracked from.
-class ValueTrackerResult {
-private:
-  /// Track all sources found by one ValueTracker query.
-  SmallVector<RegSubRegPair, 2> RegSrcs;
-
-  /// Instruction using the sources in 'RegSrcs'.
-  const MachineInstr *Inst = nullptr;
+/// Specialized rewriter for EXTRACT_SUBREG instruction.
+class ExtractSubregRewriter : public Rewriter {
+  const TargetInstrInfo &TII;
 
 public:
-  ValueTrackerResult() = default;
-
-  ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); }
+  ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
+      : Rewriter(MI), TII(TII) {
+    assert(MI.isExtractSubreg() && "Invalid instruction");
+  }
 
-  bool isValid() const { return getNumSources() > 0; }
+  /// \see Rewriter::getNextRewritableSource()
+  /// Here CopyLike has the following form:
+  /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
+  /// There is only one rewritable source: Src.subIdx,
+  /// which defines dst.dstSubIdx.
+  bool getNextRewritableSource(RegSubRegPair &Src,
+                               RegSubRegPair &Dst) override {
+    // If we already get the only source we can rewrite, return false.
+    if (CurrentSrcIdx == 1)
+      return false;
+    // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
+    CurrentSrcIdx = 1;
+    const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
+    // If we have to compose sub-register indices, bail out.
+    if (MOExtractedReg.getSubReg())
+      return false;
 
-  void setInst(const MachineInstr *I) { Inst = I; }
-  const MachineInstr *getInst() const { return Inst; }
+    Src =
+        RegSubRegPair(MOExtractedReg.getReg(), CopyLike.getOperand(2).getImm());
 
-  void clear() {
-    RegSrcs.clear();
-    Inst = nullptr;
+    // We want to track something that is compatible with the definition.
+    const MachineOperand &MODef = CopyLike.getOperand(0);
+    Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+    return true;
   }
 
-  void addSource(Register SrcReg, unsigned SrcSubReg) {
-    RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
+  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+    // The only source we can rewrite is the input register.
+    if (CurrentSrcIdx != 1)
+      return false;
+
+    CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
+
+    // If we find a source that does not require to extract something,
+    // rewrite the operation with a copy.
+    if (!NewSubReg) {
+      // Move the current index to an invalid position.
+      // We do not want another call to this method to be able
+      // to do any change.
+      CurrentSrcIdx = -1;
+      // Rewrite the operation as a COPY.
+      // Get rid of the sub-register index.
+      CopyLike.removeOperand(2);
+      // Morph the operation into a COPY.
+      CopyLike.setDesc(TII.get(TargetOpcode::COPY));
+      return true;
+    }
+    CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
+    return true;
   }
+};
 
-  void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
-    assert(Idx < getNumSources() && "Reg pair source out of index");
-    RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
+/// Specialized rewriter for REG_SEQUENCE instruction.
+class RegSequenceRewriter : public Rewriter {
+public:
+  RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
+    assert(MI.isRegSequence() && "Invalid instruction");
   }
 
-  int getNumSources() const { return RegSrcs.size(); }
+  /// \see Rewriter::getNextRewritableSource()
+  /// Here CopyLike has the following form:
+  /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
+  /// Each call will return a different source, walking all the available
+  /// source.
+  ///
+  /// The first call returns:
+  /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
+  /// (DstReg, DstSubReg) = (dst, subIdx1).
+  ///
+  /// The second call returns:
+  /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+  /// (DstReg, DstSubReg) = (dst, subIdx2).
+  ///
+  /// And so on, until all the sources have been traversed, then
+  /// it returns false.
+  bool getNextRewritableSource(RegSubRegPair &Src,
+                               RegSubRegPair &Dst) override {
+    // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
 
-  RegSubRegPair getSrc(int Idx) const { return RegSrcs[Idx]; }
+    // If this is the first call, move to the first argument.
+    if (CurrentSrcIdx == 0) {
+      CurrentSrcIdx = 1;
+    } else {
+      // Otherwise, move to the next argument and check that it is valid.
+      CurrentSrcIdx += 2;
+      if (CurrentSrcIdx >= CopyLike.getNumOperands())
+        return false;
+    }
+    const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
+    Src.Reg = MOInsertedReg.getReg();
+    // If we have to compose sub-register indices, bail out.
+    if ((Src.SubReg = MOInsertedReg.getSubReg()))
+      return false;
 
-  Register getSrcReg(int Idx) const {
-    assert(Idx < getNumSources() && "Reg source out of index");
-    return RegSrcs[Idx].Reg;
-  }
+    // We want to track something that is compatible with the related
+    // partial definition.
+    Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
 
-  unsigned getSrcSubReg(int Idx) const {
-    assert(Idx < getNumSources() && "SubReg source out of index");
-    return RegSrcs[Idx].SubReg;
+    const MachineOperand &MODef = CopyLike.getOperand(0);
+    Dst.Reg = MODef.getReg();
+    // If we have to compose sub-registers, bail.
+    return MODef.getSubReg() == 0;
   }
 
-  bool operator==(const ValueTrackerResult &Other) const {
-    if (Other.getInst() != getInst())
-      return false;
-
-    if (Other.getNumSources() != getNumSources())
+  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
+    // We cannot rewrite out of bound operands.
+    // Moreover, rewritable sources are at odd positions.
+    if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
       return false;
 
-    for (int i = 0, e = Other.getNumSources(); i != e; ++i)
-      if (Other.getSrcReg(i) != getSrcReg(i) ||
-          Other.getSrcSubReg(i) != getSrcSubReg(i))
-        return false;
+    MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+    MO.setReg(NewReg);
+    MO.setSubReg(NewSubReg);
     return true;
   }
 };
 
-/// Helper class to track the possible sources of a value defined by
-/// a (chain of) copy related instructions.
-/// Given a definition (instruction and definition index), this class
-/// follows the use-def chain to find successive suitable sources.
-/// The given source can be used to rewrite the definition into
-/// def = COPY src.
-///
-/// For instance, let us consider the following snippet:
-/// v0 =
-/// v2 = INSERT_SUBREG v1, v0, sub0
-/// def = COPY v2.sub0
-///
-/// Using a ValueTracker for def = COPY v2.sub0 will give the following
-/// suitable sources:
-/// v2.sub0 and v0.
-/// Then, def can be rewritten into def = COPY v0.
-class ValueTracker {
-private:
-  /// The current point into the use-def chain.
-  const MachineInstr *Def = nullptr;
+class PeepholeOptimizer : private MachineFunction::Delegate {
+  const TargetInstrInfo *TII = nullptr;
+  const TargetRegisterInfo *TRI = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+  MachineDominatorTree *DT = nullptr; // Machine dominator tree
+  MachineLoopInfo *MLI = nullptr;
 
-  /// The index of the definition in Def.
-  unsigned DefIdx = 0;
+public:
+  PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
+      : DT(DT), MLI(MLI) {}
 
-  /// The sub register index of the definition.
-  unsigned DefSubReg;
+  bool run(MachineFunction &MF);
+  /// Track Def -> Use info used for rewriting copies.
+  using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
 
-  /// The register where the value can be found.
-  Register Reg;
+  /// Sequence of instructions that formulate recurrence cycle.
+  using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
 
-  /// MachineRegisterInfo used to perform tracking.
-  const MachineRegisterInfo &MRI;
+private:
+  bool optimizeCmpInstr(MachineInstr &MI);
+  bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
+                        SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+  bool optimizeSelect(MachineInstr &MI,
+                      SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+  bool optimizeCondBranch(MachineInstr &MI);
 
-  /// Optional TargetInstrInfo used to perform some complex tracking.
-  const TargetInstrInfo *TII;
+  bool optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter);
+  bool optimizeCoalescableCopy(MachineInstr &MI);
+  bool optimizeUncoalescableCopy(MachineInstr &MI,
+                                 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+  bool optimizeRecurrence(MachineInstr &PHI);
+  bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
+  bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+                       DenseMap<Register, MachineInstr *> &ImmDefMIs);
+  bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+                     DenseMap<Register, MachineInstr *> &ImmDefMIs,
+                     bool &Deleted);
 
-  /// Dispatcher to the right underlying implementation of getNextSource.
-  ValueTrackerResult getNextSourceImpl();
+  /// Finds recurrence cycles, but only ones that formulated around
+  /// a def operand and a use operand that are tied. If there is a use
+  /// operand commutable with the tied use operand, find recurrence cycle
+  /// along that operand as well.
+  bool findTargetRecurrence(Register Reg,
+                            const SmallSet<Register, 2> &TargetReg,
+                            RecurrenceCycle &RC);
 
-  /// Specialized version of getNextSource for Copy instructions.
-  ValueTrackerResult getNextSourceFromCopy();
+  /// If copy instruction \p MI is a virtual register copy or a copy of a
+  /// constant physical register to a virtual register, track it in the
+  /// set CopySrcMIs. If this virtual register was previously seen as a
+  /// copy, replace the uses of this copy with the previously seen copy's
+  /// destination register.
+  bool foldRedundantCopy(MachineInstr &MI);
 
-  /// Specialized version of getNextSource for Bitcast instructions.
+  /// Is the register \p Reg a non-allocatable physical register?
+  bool isNAPhysCopy(Register Reg);
+
+  /// If copy instruction \p MI is a non-allocatable virtual<->physical
+  /// register copy, track it in the \p NAPhysToVirtMIs map. If this
+  /// non-allocatable physical register was previously copied to a virtual
+  /// registered and hasn't been clobbered, the virt->phys copy can be
+  /// deleted.
+  bool
+  foldRedundantNAPhysCopy(MachineInstr &MI,
+                          DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
+
+  bool isLoadFoldable(MachineInstr &MI,
+                      SmallSet<Register, 16> &FoldAsLoadDefCandidates);
+
+  /// Check whether \p MI is understood by the register coalescer
+  /// but may require some rewriting.
+  bool isCoalescableCopy(const MachineInstr &MI) {
+    // SubregToRegs are not interesting, because they are already register
+    // coalescer friendly.
+    return MI.isCopy() ||
+           (!DisableAdvCopyOpt && (MI.isRegSequence() || MI.isInsertSubreg() ||
+                                   MI.isExtractSubreg()));
+  }
+
+  /// Check whether \p MI is a copy like instruction that is
+  /// not recognized by the register coalescer.
+  bool isUncoalescableCopy(const MachineInstr &MI) {
+    return MI.isBitcast() || (!DisableAdvCopyOpt && (MI.isRegSequenceLike() ||
+                                                     MI.isInsertSubregLike() ||
+                                                     MI.isExtractSubregLike()));
+  }
+
+  MachineInstr &rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def,
+                              RewriteMapTy &RewriteMap);
+
+  // Set of copies to virtual registers keyed by source register.  Never
+  // holds any physreg which requires def tracking.
+  DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
+
+  // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs.
+  void MF_HandleInsertion(MachineInstr &MI) override { return; }
+
+  bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) {
+    if (!MI.isCopy())
+      return false;
+
+    Register SrcReg = MI.getOperand(1).getReg();
+    unsigned SrcSubReg = MI.getOperand(1).getSubReg();
+    if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
+      return false;
+
+    SrcPair = RegSubRegPair(SrcReg, SrcSubReg);
+    return true;
+  }
+
+  // If a COPY instruction is to be deleted or changed, we should also remove
+  // it from CopySrcMIs.
+  void deleteChangedCopy(MachineInstr &MI) {
+    RegSubRegPair SrcPair;
+    if (!getCopySrc(MI, SrcPair))
+      return;
+
+    auto It = CopySrcMIs.find(SrcPair);
+    if (It != CopySrcMIs.end() && It->second == &MI)
+      CopySrcMIs.erase(It);
+  }
+
+  void MF_HandleRemoval(MachineInstr &MI) override { deleteChangedCopy(MI); }
+
+  void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override {
+    deleteChangedCopy(MI);
+  }
+};
+
+class PeepholeOptimizerLegacy : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification
+
+  PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {
+    initializePeepholeOptimizerLegacyPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineLoopInfoWrapperPass>();
+    AU.addPreserved<MachineLoopInfoWrapperPass>();
+    if (Aggressive) {
+      AU.addRequired<MachineDominatorTreeWrapperPass>();
+      AU.addPreserved<MachineDominatorTreeWrapperPass>();
+    }
+  }
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::IsSSA);
+  }
+};
+
+/// Helper class to hold instructions that are inside recurrence cycles.
+/// The recurrence cycle is formulated around 1) a def operand and its
+/// tied use operand, or 2) a def operand and a use operand that is commutable
+/// with another use operand which is tied to the def operand. In the latter
+/// case, index of the tied use operand and the commutable use operand are
+/// maintained with CommutePair.
+class RecurrenceInstr {
+public:
+  using IndexPair = std::pair<unsigned, unsigned>;
+
+  RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
+  RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
+      : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
+
+  MachineInstr *getMI() const { return MI; }
+  std::optional<IndexPair> getCommutePair() const { return CommutePair; }
+
+private:
+  MachineInstr *MI;
+  std::optional<IndexPair> CommutePair;
+};
+
+/// Helper class to hold a reply for ValueTracker queries.
+/// Contains the returned sources for a given search and the instructions
+/// where the sources were tracked from.
+class ValueTrackerResult {
+private:
+  /// Track all sources found by one ValueTracker query.
+  SmallVector<RegSubRegPair, 2> RegSrcs;
+
+  /// Instruction using the sources in 'RegSrcs'.
+  const MachineInstr *Inst = nullptr;
+
+public:
+  ValueTrackerResult() = default;
+
+  ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); }
+
+  bool isValid() const { return getNumSources() > 0; }
+
+  void setInst(const MachineInstr *I) { Inst = I; }
+  const MachineInstr *getInst() const { return Inst; }
+
+  void clear() {
+    RegSrcs.clear();
+    Inst = nullptr;
+  }
+
+  void addSource(Register SrcReg, unsigned SrcSubReg) {
+    RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
+  }
+
+  void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
+    assert(Idx < getNumSources() && "Reg pair source out of index");
+    RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
+  }
+
+  int getNumSources() const { return RegSrcs.size(); }
+
+  RegSubRegPair getSrc(int Idx) const { return RegSrcs[Idx]; }
+
+  Register getSrcReg(int Idx) const {
+    assert(Idx < getNumSources() && "Reg source out of index");
+    return RegSrcs[Idx].Reg;
+  }
+
+  unsigned getSrcSubReg(int Idx) const {
+    assert(Idx < getNumSources() && "SubReg source out of index");
+    return RegSrcs[Idx].SubReg;
+  }
+
+  bool operator==(const ValueTrackerResult &Other) const {
+    if (Other.getInst() != getInst())
+      return false;
+
+    if (Other.getNumSources() != getNumSources())
+      return false;
+
+    for (int i = 0, e = Other.getNumSources(); i != e; ++i)
+      if (Other.getSrcReg(i) != getSrcReg(i) ||
+          Other.getSrcSubReg(i) != getSrcSubReg(i))
+        return false;
+    return true;
+  }
+};
+
+/// Helper class to track the possible sources of a value defined by
+/// a (chain of) copy related instructions.
+/// Given a definition (instruction and definition index), this class
+/// follows the use-def chain to find successive suitable sources.
+/// The given source can be used to rewrite the definition into
+/// def = COPY src.
+///
+/// For instance, let us consider the following snippet:
+/// v0 =
+/// v2 = INSERT_SUBREG v1, v0, sub0
+/// def = COPY v2.sub0
+///
+/// Using a ValueTracker for def = COPY v2.sub0 will give the following
+/// suitable sources:
+/// v2.sub0 and v0.
+/// Then, def can be rewritten into def = COPY v0.
+class ValueTracker {
+private:
+  /// The current point into the use-def chain.
+  const MachineInstr *Def = nullptr;
+
+  /// The index of the definition in Def.
+  unsigned DefIdx = 0;
+
+  /// The sub register index of the definition.
+  unsigned DefSubReg;
+
+  /// The register where the value can be found.
+  Register Reg;
+
+  /// MachineRegisterInfo used to perform tracking.
+  const MachineRegisterInfo &MRI;
+
+  /// Optional TargetInstrInfo used to perform some complex tracking.
+  const TargetInstrInfo *TII;
+
+  /// Dispatcher to the right underlying implementation of getNextSource.
+  ValueTrackerResult getNextSourceImpl();
+
+  /// Specialized version of getNextSource for Copy instructions.
+  ValueTrackerResult getNextSourceFromCopy();
+
+  /// Specialized version of getNextSource for Bitcast instructions.
   ValueTrackerResult getNextSourceFromBitcast();
 
   /// Specialized version of getNextSource for RegSequence instructions.
@@ -707,457 +1003,136 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
 /// Try to find the next source that share the same register file
 /// for the value defined by \p Reg and \p SubReg.
 /// When true is returned, the \p RewriteMap can be used by the client to
-/// retrieve all Def -> Use along the way up to the next source. Any found
-/// Use that is not itself a key for another entry, is the next source to
-/// use. During the search for the next source, multiple sources can be found
-/// given multiple incoming sources of a PHI instruction. In this case, we
-/// look in each PHI source for the next source; all found next sources must
-/// share the same register file as \p Reg and \p SubReg. The client should
-/// then be capable to rewrite all intermediate PHIs to get the next source.
-/// \return False if no alternative sources are available. True otherwise.
-bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
-                                       RewriteMapTy &RewriteMap) {
-  // Do not try to find a new source for a physical register.
-  // So far we do not have any motivating example for doing that.
-  // Thus, instead of maintaining untested code, we will revisit that if
-  // that changes at some point.
-  Register Reg = RegSubReg.Reg;
-  if (Reg.isPhysical())
-    return false;
-  const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
-
-  SmallVector<RegSubRegPair, 4> SrcToLook;
-  RegSubRegPair CurSrcPair = RegSubReg;
-  SrcToLook.push_back(CurSrcPair);
-
-  unsigned PHICount = 0;
-  do {
-    CurSrcPair = SrcToLook.pop_back_val();
-    // As explained above, do not handle physical registers
-    if (CurSrcPair.Reg.isPhysical())
-      return false;
-
-    ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
-
-    // Follow the chain of copies until we find a more suitable source, a phi
-    // or have to abort.
-    while (true) {
-      ValueTrackerResult Res = ValTracker.getNextSource();
-      // Abort at the end of a chain (without finding a suitable source).
-      if (!Res.isValid())
-        return false;
-
-      // Insert the Def -> Use entry for the recently found source.
-      ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
-      if (CurSrcRes.isValid()) {
-        assert(CurSrcRes == Res && "ValueTrackerResult found must match");
-        // An existent entry with multiple sources is a PHI cycle we must avoid.
-        // Otherwise it's an entry with a valid next source we already found.
-        if (CurSrcRes.getNumSources() > 1) {
-          LLVM_DEBUG(dbgs()
-                     << "findNextSource: found PHI cycle, aborting...\n");
-          return false;
-        }
-        break;
-      }
-      RewriteMap.insert(std::make_pair(CurSrcPair, Res));
-
-      // ValueTrackerResult usually have one source unless it's the result from
-      // a PHI instruction. Add the found PHI edges to be looked up further.
-      unsigned NumSrcs = Res.getNumSources();
-      if (NumSrcs > 1) {
-        PHICount++;
-        if (PHICount >= RewritePHILimit) {
-          LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
-          return false;
-        }
-
-        for (unsigned i = 0; i < NumSrcs; ++i)
-          SrcToLook.push_back(Res.getSrc(i));
-        break;
-      }
-
-      CurSrcPair = Res.getSrc(0);
-      // Do not extend the live-ranges of physical registers as they add
-      // constraints to the register allocator. Moreover, if we want to extend
-      // the live-range of a physical register, unlike SSA virtual register,
-      // we will have to check that they aren't redefine before the related use.
-      if (CurSrcPair.Reg.isPhysical())
-        return false;
-
-      // Keep following the chain if the value isn't any better yet.
-      const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
-      if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
-                                     CurSrcPair.SubReg))
-        continue;
-
-      // We currently cannot deal with subreg operands on PHI instructions
-      // (see insertPHI()).
-      if (PHICount > 0 && CurSrcPair.SubReg != 0)
-        continue;
-
-      // We found a suitable source, and are done with this chain.
-      break;
-    }
-  } while (!SrcToLook.empty());
-
-  // If we did not find a more suitable source, there is nothing to optimize.
-  return CurSrcPair.Reg != Reg;
-}
-
-/// Insert a PHI instruction with incoming edges \p SrcRegs that are
-/// guaranteed to have the same register class. This is necessary whenever we
-/// successfully traverse a PHI instruction and find suitable sources coming
-/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
-/// suitable to be used in a new COPY instruction.
-static MachineInstr &insertPHI(MachineRegisterInfo &MRI,
-                               const TargetInstrInfo &TII,
-                               const SmallVectorImpl<RegSubRegPair> &SrcRegs,
-                               MachineInstr &OrigPHI) {
-  assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
-
-  const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
-  // NewRC is only correct if no subregisters are involved. findNextSource()
-  // should have rejected those cases already.
-  assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
-  Register NewVR = MRI.createVirtualRegister(NewRC);
-  MachineBasicBlock *MBB = OrigPHI.getParent();
-  MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
-                                    TII.get(TargetOpcode::PHI), NewVR);
-
-  unsigned MBBOpIdx = 2;
-  for (const RegSubRegPair &RegPair : SrcRegs) {
-    MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
-    MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
-    // Since we're extended the lifetime of RegPair.Reg, clear the
-    // kill flags to account for that and make RegPair.Reg reaches
-    // the new PHI.
-    MRI.clearKillFlags(RegPair.Reg);
-    MBBOpIdx += 2;
-  }
-
-  return *MIB;
-}
-
-namespace {
-
-/// Interface to query instructions amenable to copy rewriting.
-class Rewriter {
-protected:
-  MachineInstr &CopyLike;
-  unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
-public:
-  Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
-  virtual ~Rewriter() = default;
-
-  /// Get the next rewritable source (SrcReg, SrcSubReg) and
-  /// the related value that it affects (DstReg, DstSubReg).
-  /// A source is considered rewritable if its register class and the
-  /// register class of the related DstReg may not be register
-  /// coalescer friendly. In other words, given a copy-like instruction
-  /// not all the arguments may be returned at rewritable source, since
-  /// some arguments are none to be register coalescer friendly.
-  ///
-  /// Each call of this method moves the current source to the next
-  /// rewritable source.
-  /// For instance, let CopyLike be the instruction to rewrite.
-  /// CopyLike has one definition and one source:
-  /// dst.dstSubIdx = CopyLike src.srcSubIdx.
-  ///
-  /// The first call will give the first rewritable source, i.e.,
-  /// the only source this instruction has:
-  /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
-  /// This source defines the whole definition, i.e.,
-  /// (DstReg, DstSubReg) = (dst, dstSubIdx).
-  ///
-  /// The second and subsequent calls will return false, as there is only one
-  /// rewritable source.
-  ///
-  /// \return True if a rewritable source has been found, false otherwise.
-  /// The output arguments are valid if and only if true is returned.
-  virtual bool getNextRewritableSource(RegSubRegPair &Src,
-                                       RegSubRegPair &Dst) = 0;
-
-  /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
-  /// \return True if the rewriting was possible, false otherwise.
-  virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
-};
-
-/// Rewriter for COPY instructions.
-class CopyRewriter : public Rewriter {
-public:
-  CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
-    assert(MI.isCopy() && "Expected copy instruction");
-  }
-  virtual ~CopyRewriter() = default;
-
-  bool getNextRewritableSource(RegSubRegPair &Src,
-                               RegSubRegPair &Dst) override {
-    // CurrentSrcIdx > 0 means this function has already been called.
-    if (CurrentSrcIdx > 0)
-      return false;
-    // This is the first call to getNextRewritableSource.
-    // Move the CurrentSrcIdx to remember that we made that call.
-    CurrentSrcIdx = 1;
-    // The rewritable source is the argument.
-    const MachineOperand &MOSrc = CopyLike.getOperand(1);
-    Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
-    // What we track are the alternative sources of the definition.
-    const MachineOperand &MODef = CopyLike.getOperand(0);
-    Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
-    return true;
-  }
-
-  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
-    if (CurrentSrcIdx != 1)
-      return false;
-    MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
-    MOSrc.setReg(NewReg);
-    MOSrc.setSubReg(NewSubReg);
-    return true;
-  }
-};
-
-/// Helper class to rewrite uncoalescable copy like instructions
-/// into new COPY (coalescable friendly) instructions.
-class UncoalescableRewriter : public Rewriter {
-  unsigned NumDefs; ///< Number of defs in the bitcast.
-
-public:
-  UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
-    NumDefs = MI.getDesc().getNumDefs();
-  }
-
-  /// \see See Rewriter::getNextRewritableSource()
-  /// All such sources need to be considered rewritable in order to
-  /// rewrite a uncoalescable copy-like instruction. This method return
-  /// each definition that must be checked if rewritable.
-  bool getNextRewritableSource(RegSubRegPair &Src,
-                               RegSubRegPair &Dst) override {
-    // Find the next non-dead definition and continue from there.
-    if (CurrentSrcIdx == NumDefs)
-      return false;
-
-    while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
-      ++CurrentSrcIdx;
-      if (CurrentSrcIdx == NumDefs)
-        return false;
-    }
-
-    // What we track are the alternative sources of the definition.
-    Src = RegSubRegPair(0, 0);
-    const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
-    Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
-
-    CurrentSrcIdx++;
-    return true;
-  }
-
-  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
-    return false;
-  }
-};
-
-/// Specialized rewriter for INSERT_SUBREG instruction.
-class InsertSubregRewriter : public Rewriter {
-public:
-  InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
-    assert(MI.isInsertSubreg() && "Invalid instruction");
-  }
-
-  /// \see See Rewriter::getNextRewritableSource()
-  /// Here CopyLike has the following form:
-  /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
-  /// Src1 has the same register class has dst, hence, there is
-  /// nothing to rewrite.
-  /// Src2.src2SubIdx, may not be register coalescer friendly.
-  /// Therefore, the first call to this method returns:
-  /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
-  /// (DstReg, DstSubReg) = (dst, subIdx).
-  ///
-  /// Subsequence calls will return false.
-  bool getNextRewritableSource(RegSubRegPair &Src,
-                               RegSubRegPair &Dst) override {
-    // If we already get the only source we can rewrite, return false.
-    if (CurrentSrcIdx == 2)
-      return false;
-    // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
-    CurrentSrcIdx = 2;
-    const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
-    Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
-    const MachineOperand &MODef = CopyLike.getOperand(0);
-
-    // We want to track something that is compatible with the
-    // partial definition.
-    if (MODef.getSubReg())
-      // Bail if we have to compose sub-register indices.
-      return false;
-    Dst = RegSubRegPair(MODef.getReg(),
-                        (unsigned)CopyLike.getOperand(3).getImm());
-    return true;
-  }
-
-  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
-    if (CurrentSrcIdx != 2)
-      return false;
-    // We are rewriting the inserted reg.
-    MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
-    MO.setReg(NewReg);
-    MO.setSubReg(NewSubReg);
-    return true;
-  }
-};
-
-/// Specialized rewriter for EXTRACT_SUBREG instruction.
-class ExtractSubregRewriter : public Rewriter {
-  const TargetInstrInfo &TII;
-
-public:
-  ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
-      : Rewriter(MI), TII(TII) {
-    assert(MI.isExtractSubreg() && "Invalid instruction");
-  }
-
-  /// \see Rewriter::getNextRewritableSource()
-  /// Here CopyLike has the following form:
-  /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
-  /// There is only one rewritable source: Src.subIdx,
-  /// which defines dst.dstSubIdx.
-  bool getNextRewritableSource(RegSubRegPair &Src,
-                               RegSubRegPair &Dst) override {
-    // If we already get the only source we can rewrite, return false.
-    if (CurrentSrcIdx == 1)
-      return false;
-    // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
-    CurrentSrcIdx = 1;
-    const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
-    // If we have to compose sub-register indices, bail out.
-    if (MOExtractedReg.getSubReg())
-      return false;
-
-    Src =
-        RegSubRegPair(MOExtractedReg.getReg(), CopyLike.getOperand(2).getImm());
+/// retrieve all Def -> Use along the way up to the next source. Any found
+/// Use that is not itself a key for another entry, is the next source to
+/// use. During the search for the next source, multiple sources can be found
+/// given multiple incoming sources of a PHI instruction. In this case, we
+/// look in each PHI source for the next source; all found next sources must
+/// share the same register file as \p Reg and \p SubReg. The client should
+/// then be capable to rewrite all intermediate PHIs to get the next source.
+/// \return False if no alternative sources are available. True otherwise.
+bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
+                                       RewriteMapTy &RewriteMap) {
+  // Do not try to find a new source for a physical register.
+  // So far we do not have any motivating example for doing that.
+  // Thus, instead of maintaining untested code, we will revisit that if
+  // that changes at some point.
+  Register Reg = RegSubReg.Reg;
+  if (Reg.isPhysical())
+    return false;
+  const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
 
-    // We want to track something that is compatible with the definition.
-    const MachineOperand &MODef = CopyLike.getOperand(0);
-    Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
-    return true;
-  }
+  SmallVector<RegSubRegPair, 4> SrcToLook;
+  RegSubRegPair CurSrcPair = RegSubReg;
+  SrcToLook.push_back(CurSrcPair);
 
-  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
-    // The only source we can rewrite is the input register.
-    if (CurrentSrcIdx != 1)
+  unsigned PHICount = 0;
+  do {
+    CurSrcPair = SrcToLook.pop_back_val();
+    // As explained above, do not handle physical registers
+    if (CurSrcPair.Reg.isPhysical())
       return false;
 
-    CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
+    ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
 
-    // If we find a source that does not require to extract something,
-    // rewrite the operation with a copy.
-    if (!NewSubReg) {
-      // Move the current index to an invalid position.
-      // We do not want another call to this method to be able
-      // to do any change.
-      CurrentSrcIdx = -1;
-      // Rewrite the operation as a COPY.
-      // Get rid of the sub-register index.
-      CopyLike.removeOperand(2);
-      // Morph the operation into a COPY.
-      CopyLike.setDesc(TII.get(TargetOpcode::COPY));
-      return true;
-    }
-    CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
-    return true;
-  }
-};
+    // Follow the chain of copies until we find a more suitable source, a phi
+    // or have to abort.
+    while (true) {
+      ValueTrackerResult Res = ValTracker.getNextSource();
+      // Abort at the end of a chain (without finding a suitable source).
+      if (!Res.isValid())
+        return false;
 
-/// Specialized rewriter for REG_SEQUENCE instruction.
-class RegSequenceRewriter : public Rewriter {
-public:
-  RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
-    assert(MI.isRegSequence() && "Invalid instruction");
-  }
+      // Insert the Def -> Use entry for the recently found source.
+      ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
+      if (CurSrcRes.isValid()) {
+        assert(CurSrcRes == Res && "ValueTrackerResult found must match");
+        // An existent entry with multiple sources is a PHI cycle we must avoid.
+        // Otherwise it's an entry with a valid next source we already found.
+        if (CurSrcRes.getNumSources() > 1) {
+          LLVM_DEBUG(dbgs()
+                     << "findNextSource: found PHI cycle, aborting...\n");
+          return false;
+        }
+        break;
+      }
+      RewriteMap.insert(std::make_pair(CurSrcPair, Res));
 
-  /// \see Rewriter::getNextRewritableSource()
-  /// Here CopyLike has the following form:
-  /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
-  /// Each call will return a different source, walking all the available
-  /// source.
-  ///
-  /// The first call returns:
-  /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
-  /// (DstReg, DstSubReg) = (dst, subIdx1).
-  ///
-  /// The second call returns:
-  /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
-  /// (DstReg, DstSubReg) = (dst, subIdx2).
-  ///
-  /// And so on, until all the sources have been traversed, then
-  /// it returns false.
-  bool getNextRewritableSource(RegSubRegPair &Src,
-                               RegSubRegPair &Dst) override {
-    // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
+      // ValueTrackerResult usually have one source unless it's the result from
+      // a PHI instruction. Add the found PHI edges to be looked up further.
+      unsigned NumSrcs = Res.getNumSources();
+      if (NumSrcs > 1) {
+        PHICount++;
+        if (PHICount >= RewritePHILimit) {
+          LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
+          return false;
+        }
 
-    // If this is the first call, move to the first argument.
-    if (CurrentSrcIdx == 0) {
-      CurrentSrcIdx = 1;
-    } else {
-      // Otherwise, move to the next argument and check that it is valid.
-      CurrentSrcIdx += 2;
-      if (CurrentSrcIdx >= CopyLike.getNumOperands())
+        for (unsigned i = 0; i < NumSrcs; ++i)
+          SrcToLook.push_back(Res.getSrc(i));
+        break;
+      }
+
+      CurSrcPair = Res.getSrc(0);
+      // Do not extend the live-ranges of physical registers as they add
+      // constraints to the register allocator. Moreover, if we want to extend
+      // the live-range of a physical register, unlike SSA virtual register,
+      // we will have to check that they aren't redefine before the related use.
+      if (CurSrcPair.Reg.isPhysical())
         return false;
-    }
-    const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
-    Src.Reg = MOInsertedReg.getReg();
-    // If we have to compose sub-register indices, bail out.
-    if ((Src.SubReg = MOInsertedReg.getSubReg()))
-      return false;
 
-    // We want to track something that is compatible with the related
-    // partial definition.
-    Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
+      // Keep following the chain if the value isn't any better yet.
+      const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
+      if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
+                                     CurSrcPair.SubReg))
+        continue;
 
-    const MachineOperand &MODef = CopyLike.getOperand(0);
-    Dst.Reg = MODef.getReg();
-    // If we have to compose sub-registers, bail.
-    return MODef.getSubReg() == 0;
-  }
+      // We currently cannot deal with subreg operands on PHI instructions
+      // (see insertPHI()).
+      if (PHICount > 0 && CurSrcPair.SubReg != 0)
+        continue;
 
-  bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
-    // We cannot rewrite out of bound operands.
-    // Moreover, rewritable sources are at odd positions.
-    if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
-      return false;
+      // We found a suitable source, and are done with this chain.
+      break;
+    }
+  } while (!SrcToLook.empty());
 
-    MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
-    MO.setReg(NewReg);
-    MO.setSubReg(NewSubReg);
-    return true;
-  }
-};
+  // If we did not find a more suitable source, there is nothing to optimize.
+  return CurSrcPair.Reg != Reg;
+}
 
-} // end anonymous namespace
+/// Insert a PHI instruction with incoming edges \p SrcRegs that are
+/// guaranteed to have the same register class. This is necessary whenever we
+/// successfully traverse a PHI instruction and find suitable sources coming
+/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
+/// suitable to be used in a new COPY instruction.
+static MachineInstr &insertPHI(MachineRegisterInfo &MRI,
+                               const TargetInstrInfo &TII,
+                               const SmallVectorImpl<RegSubRegPair> &SrcRegs,
+                               MachineInstr &OrigPHI) {
+  assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
 
-/// Get the appropriated Rewriter for \p MI.
-/// \return A pointer to a dynamically allocated Rewriter or nullptr if no
-/// rewriter works for \p MI.
-static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) {
-  // Handle uncoalescable copy-like instructions.
-  if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
-      MI.isExtractSubregLike())
-    return new UncoalescableRewriter(MI);
+  const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
+  // NewRC is only correct if no subregisters are involved. findNextSource()
+  // should have rejected those cases already.
+  assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
+  Register NewVR = MRI.createVirtualRegister(NewRC);
+  MachineBasicBlock *MBB = OrigPHI.getParent();
+  MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
+                                    TII.get(TargetOpcode::PHI), NewVR);
 
-  switch (MI.getOpcode()) {
-  default:
-    return nullptr;
-  case TargetOpcode::COPY:
-    return new CopyRewriter(MI);
-  case TargetOpcode::INSERT_SUBREG:
-    return new InsertSubregRewriter(MI);
-  case TargetOpcode::EXTRACT_SUBREG:
-    return new ExtractSubregRewriter(MI, TII);
-  case TargetOpcode::REG_SEQUENCE:
-    return new RegSequenceRewriter(MI);
+  unsigned MBBOpIdx = 2;
+  for (const RegSubRegPair &RegPair : SrcRegs) {
+    MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
+    MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
+    // Since we're extended the lifetime of RegPair.Reg, clear the
+    // kill flags to account for that and make RegPair.Reg reaches
+    // the new PHI.
+    MRI.clearKillFlags(RegPair.Reg);
+    MBBOpIdx += 2;
   }
+
+  return *MIB;
 }
 
 /// Given a \p Def.Reg and Def.SubReg  pair, use \p RewriteMap to find
@@ -1212,36 +1187,13 @@ getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
   return RegSubRegPair(0, 0);
 }
 
-/// Optimize generic copy instructions to avoid cross register bank copy.
-/// The optimization looks through a chain of copies and tries to find a source
-/// that has a compatible register class.
-/// Two register classes are considered to be compatible if they share the same
-/// register bank.
-/// New copies issued by this optimization are register allocator
-/// friendly. This optimization does not remove any copy as it may
-/// overconstrain the register allocator, but replaces some operands
-/// when possible.
-/// \pre isCoalescableCopy(*MI) is true.
-/// \return True, when \p MI has been rewritten. False otherwise.
-bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
-  assert(isCoalescableCopy(MI) && "Invalid argument");
-  assert(MI.getDesc().getNumDefs() == 1 &&
-         "Coalescer can understand multiple defs?!");
-  const MachineOperand &MODef = MI.getOperand(0);
-  // Do not rewrite physical definitions.
-  if (MODef.getReg().isPhysical())
-    return false;
-
+bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) {
   bool Changed = false;
   // Get the right rewriter for the current copy.
-  std::unique_ptr<Rewriter> CpyRewriter(getCopyRewriter(MI, *TII));
-  // If none exists, bail out.
-  if (!CpyRewriter)
-    return false;
   // Rewrite each rewritable source.
   RegSubRegPair Src;
   RegSubRegPair TrackPair;
-  while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) {
+  while (CpyRewriter.getNextRewritableSource(Src, TrackPair)) {
     // Keep track of PHI nodes and its incoming edges when looking for sources.
     RewriteMapTy RewriteMap;
     // Try to find a more suitable source. If we failed to do so, or get the
@@ -1257,12 +1209,13 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
       continue;
 
     // Rewrite source.
-    if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
+    if (CpyRewriter.RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
       // We may have extended the live-range of NewSrc, account for that.
       MRI->clearKillFlags(NewSrc.Reg);
       Changed = true;
     }
   }
+
   // TODO: We could have a clean-up method to tidy the instruction.
   // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
   // => v0 = COPY v1
@@ -1272,6 +1225,44 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
   return Changed;
 }
 
+/// Optimize generic copy instructions to avoid cross register bank copy.
+/// The optimization looks through a chain of copies and tries to find a source
+/// that has a compatible register class.
+/// Two register classes are considered to be compatible if they share the same
+/// register bank.
+/// New copies issued by this optimization are register allocator
+/// friendly. This optimization does not remove any copy as it may
+/// overconstrain the register allocator, but replaces some operands
+/// when possible.
+/// \pre isCoalescableCopy(*MI) is true.
+/// \return True, when \p MI has been rewritten. False otherwise.
+bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
+  assert(isCoalescableCopy(MI) && "Invalid argument");
+  assert(MI.getDesc().getNumDefs() == 1 &&
+         "Coalescer can understand multiple defs?!");
+  const MachineOperand &MODef = MI.getOperand(0);
+  // Do not rewrite physical definitions.
+  if (MODef.getReg().isPhysical())
+    return false;
+
+  switch (MI.getOpcode()) {
+  case TargetOpcode::COPY:
+    return optimizeCoalescableCopyImpl(CopyRewriter(MI));
+  case TargetOpcode::INSERT_SUBREG:
+    return optimizeCoalescableCopyImpl(InsertSubregRewriter(MI));
+  case TargetOpcode::EXTRACT_SUBREG:
+    return optimizeCoalescableCopyImpl(ExtractSubregRewriter(MI, *TII));
+  case TargetOpcode::REG_SEQUENCE:
+    return optimizeCoalescableCopyImpl(RegSequenceRewriter(MI));
+  default:
+    // Handle uncoalescable copy-like instructions.
+    if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+        MI.isExtractSubregLike())
+      return optimizeCoalescableCopyImpl(UncoalescableRewriter(MI));
+    return false;
+  }
+}
+
 /// Rewrite the source found through \p Def, by using the \p RewriteMap
 /// and create a new COPY instruction. More info about RewriteMap in
 /// PeepholeOptimizer::findNextSource. Right now this is only used to handle



More information about the llvm-commits mailing list