[llvm] [CodeGen] Introduce MIR-level target-independent rematerialization helper (PR #177080)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 19 05:22:50 PST 2026
https://github.com/lucas-rami updated https://github.com/llvm/llvm-project/pull/177080
>From 22c9187270a1a057b82f77d9b000f88a5b44ee71 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 21 Jan 2026 01:21:19 +0000
Subject: [PATCH 1/6] [CodeGen] Introduce target-independent rematerializer
---
llvm/include/llvm/CodeGen/Rematerializer.h | 431 ++++++++++
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/Rematerializer.cpp | 746 ++++++++++++++++++
llvm/unittests/CodeGen/CMakeLists.txt | 1 +
llvm/unittests/CodeGen/RematerializerTest.cpp | 450 +++++++++++
5 files changed, 1629 insertions(+)
create mode 100644 llvm/include/llvm/CodeGen/Rematerializer.h
create mode 100644 llvm/lib/CodeGen/Rematerializer.cpp
create mode 100644 llvm/unittests/CodeGen/RematerializerTest.cpp
diff --git a/llvm/include/llvm/CodeGen/Rematerializer.h b/llvm/include/llvm/CodeGen/Rematerializer.h
new file mode 100644
index 0000000000000..ff075a51e38f0
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/Rematerializer.h
@@ -0,0 +1,431 @@
+//=====-- Rematerializer.h - MIR rematerialization support ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// MIR-level target-independent rematerialization helpers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include <iterator>
+
+namespace llvm {
+
+/// MIR-level target-independent rematerializer. Provides an API to identify and
+/// rematerialize registers within a machine function.
+///
+/// At the moment this supports rematerializing registers that meet all of the
+/// following constraints.
+/// 1. The register is virtual and has a single defining instruction.
+/// 2. The single defining instruction is deemed rematerializable by the TII and
+/// has no non-constant and non-ignorable physical register use.
+/// 3. The register has at least one non-debug use that is inside or the
+/// boundary of a region.
+///
+/// Rematerializable registers (represented by \ref Rematerializer::Reg) form a
+/// DAG of their own, with every register having incoming edges from all
+/// rematerializable registers which are read by the instruction defining it.
+/// Ignoring outgoing edges, each register can be seen as the root of its own
+/// tree within this DAG. The API uses dense unsigned integers starting at 0 to
+/// reference rematerializable registers. These indices are immutable i.e., even
+/// when registers are deleted their respective integer handle remain valid.
+/// Method which perform actual rematerializations should however be assumed to
+/// invalidate addresses to \ref Rematerializer::Reg objects.
+///
+/// The rematerializer supports rematerializing arbitrary complex trees of
+/// registers to regions where these registers are used, with the option of
+/// re-using non-root registers or their previous rematerializations instead of
+/// rematerializing them again. It also optionally supports rolling back
+/// previous rematerializations to restore the MIR state to what it was
+/// pre-rematerialization. When enabled, machine instructions defining
+/// rematerializable registers that no longer have any uses following previous
+/// rematerializations will not be deleted from the MIR; their opcode will
+/// instead be set to a debug value and their read register operands set to the
+/// null register. This maintains their position in the MIR and keeps the
+/// original register alive for potential rollback while allowing other
+/// passes/analyzes (e.g., machine scheduler, live-interval analysis) to ignore
+/// them. \ref Rematerializer::commitRematerializations actually deletes those
+/// instructions when rollback is deemed unnecessary.
+///
+/// Throughout its lifetime, the rematerializer tracks new registers it creates
+/// (which are rematerializable by construction) and their relations to other
+/// registers. It performs DAG updates immediately on rematerialization but
+/// defers/batches all necessary live interval updates to reduce the number of
+/// expensive LIS queries when successively rematerializing many registers. \ref
+/// Rematerializer::updateLiveIntervals performs all currently batched live
+/// interval updates.
+///
+/// In its nomenclature, the rematerializer differentiates between "original
+/// registers" (registers that were present when it analyzed the function) and
+/// rematerializations of these original registers. Rematerializations have a
+/// "parent" which is the original regiser they were rematerialized from
+/// (transitivity applies; a rematerialization and all of its own
+/// rematerializations have the same parent). Semantically, only original
+/// registers have rematerializations.
+class Rematerializer {
+public:
+ /// A rematerializable register defined by a single machine instruction.
+ ///
+ /// A rematerializable register has a set of dependencies, which correspond
+ /// to the unique read register operands of its defining instruction.
+ /// They are identified by their machine operand index, and can themselves be
+ /// rematerializable. Operand indices corresponding to unrematerializable
+ /// dependencies are managed by and queried from the rematerializer.
+ ///
+ /// A rematerializable register also has an arbitrary number of users in an
+ /// arbitrary number of regions, potentially including its own defining
+ /// region. When user transfers make a register lose all its users, the
+ /// rematerializer marks it for deletion, in which case its defining
+ /// instruction either becomes nullptr (without rollback support) or its
+ /// opcode is set to TargetOpcode::DBG_VALUE (with rollback support) until
+ /// \ref Rematerializer::commitRematerializations is called.
+ struct Reg {
+ /// Single MI defining the rematerializable register.
+ MachineInstr *DefMI;
+ /// Defining region of \p DefMI.
+ unsigned DefRegion;
+ /// The rematerializable register's lane bitmask.
+ LaneBitmask Mask;
+
+ using RegionUsers = SmallDenseSet<MachineInstr *, 4>;
+ /// Uses of the register, mapped by region.
+ SmallDenseMap<unsigned, RegionUsers, 2> Uses;
+
+ /// A read register operand of \p DefMI that is rematerializable (according
+ /// to the rematerializer).
+ struct Dependency {
+ /// The register's machine operand index in \p DefMI.
+ unsigned MOIdx;
+ /// The corresponding register's index in the rematerializer.
+ unsigned RegIdx;
+
+ Dependency(unsigned MOIdx, unsigned RegIdx)
+ : MOIdx(MOIdx), RegIdx(RegIdx) {}
+ };
+ /// This register's rematerializable dependencies, one per unique
+ /// rematerializable register operand.
+ SmallVector<Dependency, 2> Dependencies;
+
+ /// Returns the rematerializable register from its defining instruction.
+ inline Register getDefReg() const {
+ assert(DefMI && "defining instruction was deleted");
+ return DefMI->getOperand(0).getReg();
+ }
+
+ bool hasUsersInDefRegion() const {
+ return !Uses.empty() && Uses.contains(DefRegion);
+ }
+
+ bool hasUsersOutsideDefRegion() const {
+ if (Uses.empty())
+ return false;
+ return Uses.size() > 1 || Uses.begin()->first != DefRegion;
+ }
+
+ /// Returns the first and last user of the register in region \p UseRegion.
+ /// If the register has no user in the region, returns a pair of nullptr's.
+ std::pair<MachineInstr *, MachineInstr *>
+ getRegionUseBounds(unsigned UseRegion, const LiveIntervals &LIS) const;
+
+ private:
+ void addUser(MachineInstr *MI, unsigned Region);
+ void addUsers(const RegionUsers &NewUsers, unsigned Region);
+ void eraseUser(MachineInstr *MI, unsigned Region);
+
+ friend Rematerializer;
+ };
+
+ /// Error value for register indices.
+ static constexpr unsigned NoReg = ~0;
+
+ /// A region's boundaries i.e. a pair of instruction bundle iterators. The
+ /// lower boundary is inclusive, the upper boundary is exclusive.
+ using RegionBoundaries =
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
+
+ /// Simply initializes some internal state, does not identify
+ /// rematerialization candidates.
+ Rematerializer(MachineFunction &MF,
+ SmallVectorImpl<RegionBoundaries> &Regions,
+ bool RegionsTopDown, LiveIntervals &LIS)
+ : MF(MF), Regions(Regions), MRI(MF.getRegInfo()), LIS(LIS),
+ TII(*MF.getSubtarget().getInstrInfo()), TRI(TII.getRegisterInfo()),
+ RegionsTopDown(RegionsTopDown) {}
+
+ /// Goes through the whole MF and identifies all rematerializable registers.
+ /// Returns whether there is any rematerializable register in the MF.
+ bool analyze();
+
+ inline const Reg &getReg(unsigned RegIdx) const {
+ assert(RegIdx < Regs.size() && "out of bounds");
+ return Regs[RegIdx];
+ };
+ inline ArrayRef<Reg> getRegs() const { return Regs; };
+ inline unsigned getNumRegs() const { return Regs.size(); };
+
+ inline const RegionBoundaries &getRegion(unsigned RegionIdx) {
+ assert(RegionIdx < Regions.size() && "out of bounds");
+ return Regions[RegionIdx];
+ }
+ inline unsigned getNumRegions() const { return Regions.size(); }
+
+ inline bool isRematerialization(unsigned RegIdx) const {
+ assert(RegIdx < Regs.size() && "out of bounds");
+ return RegIdx >= UnrematableOprds.size();
+ }
+ /// Returns the parent index of rematerializable register \p RegIdx.
+ inline unsigned getParentOf(unsigned RematRegIdx) const {
+ assert(isRematerialization(RematRegIdx) && "not a rematerialization");
+ return Parents[RematRegIdx - UnrematableOprds.size()];
+ }
+ /// If \p RegIdx is a rematerialization, returns its parent's index. If it is
+ /// an original register's index, returns the same index.
+ inline unsigned getParentOrSelf(unsigned RegIdx) const {
+ if (isRematerialization(RegIdx))
+ return getParentOf(RegIdx);
+ return RegIdx;
+ }
+ /// Returns operand indices corresponding to unrematerializable operands for
+ /// any register \p RegIdx.
+ inline ArrayRef<unsigned> getUnrematableOprds(unsigned RegIdx) const {
+ return UnrematableOprds[getParentOrSelf(RegIdx)];
+ }
+
+ /// When rematerializating a register (called the "root register" in this
+ /// context) to a given position, we must decide what to do with all its
+ /// dependencies; for each dependency we can either
+ /// 1. rematerialize it along with the register,
+ /// 2. re-use it as-is, or
+ /// 3. re-use a pre-existing rematerialization of it.
+ /// In case (1), the same decision needs to be made for all of the
+ /// dependency's dependencies (i.e., the root's transitive dependencies). In
+ /// cases (2) and (3), transitive dependencies need not be examined.
+ ///
+ /// This struct allows to encode decisions of types (2) and (3) when
+ /// rematerialization of all of the root's transitive dependencies is
+ /// undesirable. During rematerialization, all of the root's transitive
+ /// dependencies which are not marked as re-used in some way will be
+ /// rematerialized along the root.
+ struct DependencyReuseInfo {
+ /// Maps registers that the root transitively depends on to their
+ /// respective rematerialization to use for the rematerialization of the
+ /// root.
+ SmallDenseMap<unsigned, unsigned, 4> DependencyMap;
+
+ DependencyReuseInfo &reuse(unsigned DepIdx) {
+ DependencyMap.insert({DepIdx, DepIdx});
+ return *this;
+ }
+ DependencyReuseInfo &useRemat(unsigned DepIdx, unsigned DepRematIdx) {
+ DependencyMap.insert({DepIdx, DepRematIdx});
+ return *this;
+ }
+ DependencyReuseInfo &clear() {
+ DependencyMap.clear();
+ return *this;
+ }
+ };
+
+ /// Rematerializes a register tree rooted at register \p RootIdx to a region
+ /// \p UseRegion where it has at least one user, transfers all its users in
+ /// the region to the new register, and returns the latter's index. Transitive
+ /// dependencies of the root are rematerialized or re-used according to \p
+ /// DRI. If \p SupportRollback is true, rematerializations of registers that
+ /// lose all their users as a consequence of the rematerializations can later
+ /// be rolled back.
+ ///
+ /// When the method returns, \p DRI contains additional mappings of all
+ /// transitive dependencies that had to be rematerialized to their
+ /// rematerialization's respective index. References to \ref
+ /// Rematerializer::Reg should be considered invalidated by calls to this
+ /// method.
+ unsigned rematerializeToRegion(unsigned RootIdx, unsigned UseRegion,
+ bool SupportRollback,
+ DependencyReuseInfo &DRI);
+
+ /// Rematerializes a register tree rooted at register \p RootIdx to position
+ /// \p InsertPos and returns the new register's index. Transitive dependencies
+ /// of the root are rematerialized or re-used according to \p DRI.
+ ///
+ /// When the method returns, \p DRI contains additional mappings of all
+ /// transitive dependencies that had to be rematerialized to their respective
+ /// rematerialization's index. References to \ref Rematerializer::Reg should
+ /// be considered invalidated by calls to this method.
+ unsigned rematerializeToPos(unsigned RootIdx,
+ MachineBasicBlock::iterator InsertPos,
+ DependencyReuseInfo &DRI);
+
+ /// Rolls back all rematerializations of original register \p RootIdx,
+ /// transfering all their users back to it and permanently deleting them from
+ /// the MIR. The root register is revived if it was fully rematerialized (this
+ /// requires that rollback support was set at that time). Transitive
+ /// dependencies of the root register that were fully rematerialized are
+ /// re-vived at their original positions; this requires that rollback support
+ /// was set when they were rematerialized.
+ void rollbackRematsOf(unsigned RootIdx);
+
+ /// Rolls back register \p RematIdx (which must be a rematerialization)
+ /// transfering all its users back to its parent. The latter is revived if it
+ /// was fully rematerialized (this requires that rollback support was set at
+ /// that time).
+ void rollback(unsigned RematIdx);
+
+ /// Revives original register \p RootIdx at its original position in the MIR
+ /// if it was fully rematerialized with rollback support set. Transitive
+ /// dependencies of the root register that were fully rematerialized are
+ /// revived at their original positions; this requires that rollback support
+ /// was set when they were themselves rematerialized.
+ void reviveRegIfDead(unsigned RootIdx);
+
+ /// Transfers all users of register \p FromRegIdx in region \p UseRegion to \p
+ /// ToRegIdx, the latter of which must be a rematerialization of the former or
+ /// have the same parent register. Users in \p UseRegion must be reachable
+ /// from \p ToRegIdx. If \p SupportRollback is true, rematerializations of
+ /// registers that lose all their users as a consequence of the transfer can
+ /// later be rolled back.
+ void transferRegionUsers(unsigned FromRegIdx, unsigned ToRegIdx,
+ unsigned UseRegion, bool SupportRollback);
+
+ /// Transfers user \p UserMI from register \p FromRegIdx to \p ToRegIdx,
+ /// the latter of which must be a rematerialization of the former or have the
+ /// same parent register. \p UserMI must be a direct user of \p FromRegIdx. \p
+ /// UserMI must be reachable from \p ToRegIdx. If \p SupportRollback is true,
+ /// rematerializations of registers that lose all their users as a consequence
+ /// of the transfer can later be rolled back.
+ void transferUser(unsigned FromRegIdx, unsigned ToRegIdx,
+ MachineInstr &UserMI, bool SupportRollback);
+
+ /// Recomputes all live intervals that have changed as a result of previous
+ /// rematerializations/rollbacks.
+ void updateLiveIntervals();
+
+ /// Deletes unused rematerialized registers that were left in the MIR to
+ /// support rollback.
+ void commitRematerializations();
+
+ /// Determines whether register operand \p MO is available at all \p Uses
+ /// according to its current live interval.
+ bool isMOAvailableAtUses(const MachineOperand &MO,
+ ArrayRef<SlotIndex> Uses) const;
+
+ /// Finds the closest rematerialization of register \p RegIdx in region \p
+ /// Region that exists before slot \p Before. If no such rematerialization
+ /// exists, returns \ref Rematerializer::NoReg.
+ unsigned findRematInRegion(unsigned RegIdx, unsigned Region,
+ SlotIndex Before) const;
+
+ Printable printTree(unsigned RootIdx) const;
+ Printable printID(unsigned RegIdx) const;
+ Printable printRematReg(unsigned RegIdx, bool SkipRegions = false) const;
+ Printable printRegUsers(unsigned RegIdx) const;
+ Printable printUser(const MachineInstr *MI) const;
+
+private:
+ MachineFunction &MF;
+ SmallVectorImpl<RegionBoundaries> &Regions;
+ MachineRegisterInfo &MRI;
+ LiveIntervals &LIS;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ bool RegionsTopDown;
+
+ /// Rematerializable registers identified since the rematerializer's creation,
+ /// both dead and alive, originals and rematerializations. No register is ever
+ /// deleted. Indices inside this vector serve as handles for rematerializable
+ /// registers.
+ SmallVector<Reg> Regs;
+ /// For each original register, stores indices of unrematerializable read
+ /// register operands. This doesn't change after the initial collection
+ /// period, so the size of the vector indicates the number of original
+ /// registers.
+ SmallVector<SmallVector<unsigned, 2>> UnrematableOprds;
+ /// Indicates the original register index of each rematerialization, in the
+ /// order in which they are created. The size of the vector indicates the
+ /// total number of rematerializations ever created, including those that were
+ /// deleted or rolled back.
+ SmallVector<unsigned> Parents;
+ /// Maps original register indices to their currently alive
+ /// rematerializations. In practive most registers don't have
+ /// rematerializations so this is represented as a map to lower memory cost.
+ DenseMap<unsigned, SmallDenseSet<unsigned, 4>> Rematerializations;
+
+ /// Registers mapped to the index of their corresponding rematerialization
+ /// data in the \ref Regs vector. This includes registers that no longer exist
+ /// in the MIR.
+ DenseMap<Register, unsigned> RegToIdx;
+ /// Maps all MIs (except lone terminators, which are not part of any region)
+ /// to their parent region. Non-lone terminators are considered part of the
+ /// region they delimitate.
+ DenseMap<MachineInstr *, unsigned> MIRegion;
+ /// Set of registers whose live-range may have changed during past
+ /// rematerializations/rollbacks.
+ DenseSet<unsigned> LISUpdates;
+ /// Keys are fully rematerialized registers whose rematerializations are
+ /// currently rollback-able. Values map register machine operand indices to
+ /// their original register.
+ DenseMap<unsigned, DenseMap<unsigned, Register>> Rollbackable;
+
+ /// Collects all rematerializable registers inside region \p DefRegion.
+ void collectRegs(unsigned DefRegion);
+
+ /// Determines whether \p MI is considered rematerializable. This further
+ /// restricts constraints imposed by the TII on rematerializable instructions,
+ /// requiring for example that the defined register is virtual and only
+ /// defined once.
+ bool isMIRematerializable(const MachineInstr &MI) const;
+
+ /// Rematerializes register \p RegIdx at \p InsertPos, adding the new
+ /// rematerializable register to the backing vector \ref Regs and returning
+ /// its index inside the vector. Sets the new registers' rematerializable
+ /// dependencies to \p Dependencies and its unrematerializable dependencies to
+ /// the same as \p RegIdx. The new register initially has no user, it is
+ /// assumed that the caller will give it at least one after its creation.
+ /// Since the method appends to \ref Regs, references to elements within it
+ /// should be considered invalidated across calls to this method unless the
+ /// vector can be guaranteed to have enough space for an extra element.
+ unsigned createReg(unsigned RegIdx, MachineBasicBlock::iterator InsertPos,
+ SmallVectorImpl<Reg::Dependency> &&Dependencies);
+
+ /// Internal version of \ref Rematerializer::transferUser that doesn't update
+ /// register users.
+ void transferUserInternal(unsigned FromRegIdx, unsigned ToRegIdx,
+ MachineInstr &UserMI);
+
+ /// Deletes register \p RootIdx if it no longer has any user. If the register
+ /// is deleted, recursively deletes any of its transitive rematerializable
+ /// dependencies that no longer have users as a result. When \p
+ /// SupportRollback is true, allows to rollback rematerializations of the
+ /// deleted register later on.
+ bool deleteRegIfUnused(unsigned RootIdx, bool SupportRollback);
+
+ /// Deletes rematerializable register \p RegIdx from the DAG and relevant
+ /// internal state.
+ void deleteReg(unsigned RegIdx);
+
+ /// If \p MI's first operand defines a register and that register is a
+ /// rematerializable register tracked by the rematerializer, returns its
+ /// index in the \ref Regs vector. Otherwise returns \ref
+ /// Rematerializer::NoReg.
+ unsigned getDefRegIdx(const MachineInstr &MI) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ unsigned CallDepth = 0;
+ raw_ostream &rdbgs() const {
+ for (unsigned I = 0; I < CallDepth; ++I)
+ dbgs() << " ";
+ return dbgs();
+ }
+#endif
+};
+
+} // namespace llvm
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f26b2cb6fddf5..69a85533cbc18 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -197,6 +197,7 @@ add_llvm_component_library(LLVMCodeGen
RegisterPressure.cpp
RegisterScavenging.cpp
GCEmptyBasicBlocks.cpp
+ Rematerializer.cpp
RemoveRedundantDebugValues.cpp
RenameIndependentSubregs.cpp
MachineStableHash.cpp
diff --git a/llvm/lib/CodeGen/Rematerializer.cpp b/llvm/lib/CodeGen/Rematerializer.cpp
new file mode 100644
index 0000000000000..b7fbfbf9f7101
--- /dev/null
+++ b/llvm/lib/CodeGen/Rematerializer.cpp
@@ -0,0 +1,746 @@
+//=====-- Rematerializer.cpp - MIR rematerialization support ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// Implements helpers for target-independent rematerialization at the MIR
+/// level.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Rematerializer.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "rematerializer"
+
+using namespace llvm;
+
+static bool isAvailableAtUse(const VNInfo *OVNI, LaneBitmask Mask,
+ SlotIndex UseIdx, const LiveInterval &LI) {
+ assert(OVNI);
+ if (OVNI != LI.getVNInfoAt(UseIdx))
+ return false;
+
+ // Check that subrange is live at user.
+ if (LI.hasSubRanges()) {
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((SR.LaneMask & Mask).none())
+ continue;
+ if (!SR.liveAt(UseIdx))
+ return false;
+
+ // Early exit if all used lanes are checked. No need to continue.
+ Mask &= ~SR.LaneMask;
+ if (Mask.none())
+ break;
+ }
+ }
+ return true;
+}
+
+static Register isRegDependency(const MachineOperand &MO) {
+ if (!MO.isReg() || !MO.readsReg())
+ return Register();
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical()) {
+ // By the requirements on trivially rematerializable instructions, a
+ // physical register use is either constant or ignorable.
+ return Register();
+ }
+ return Reg;
+}
+
+unsigned Rematerializer::rematerializeToRegion(unsigned RootIdx,
+ unsigned UseRegion,
+ bool SupportRollback,
+ DependencyReuseInfo &DRI) {
+
+ MachineInstr *FirstMI =
+ getReg(RootIdx).getRegionUseBounds(UseRegion, LIS).first;
+ unsigned NewRegIdx = rematerializeToPos(RootIdx, FirstMI, DRI);
+ transferRegionUsers(RootIdx, NewRegIdx, UseRegion, SupportRollback);
+ return NewRegIdx;
+}
+
+unsigned
+Rematerializer::rematerializeToPos(unsigned RootIdx,
+ MachineBasicBlock::iterator InsertPos,
+ DependencyReuseInfo &DRI) {
+ LLVM_DEBUG({
+ rdbgs() << "Rematerializing " << printID(RootIdx) << " to "
+ << printUser(&*InsertPos) << '\n';
+ ++CallDepth;
+ });
+
+ // Create/identify dependencies for the new register. Copy the dependencies
+ // vector because underlying updates to the backing vector of registers may
+ // invalidate references.
+ SmallVector<Reg::Dependency, 2> NewDeps, Deps(Regs[RootIdx].Dependencies);
+ for (const Reg::Dependency &Dep : Deps) {
+ if (auto NewDep = DRI.DependencyMap.find(Dep.RegIdx);
+ NewDep != DRI.DependencyMap.end()) {
+ // We already have the version of the dependency we want to use.
+ NewDeps.emplace_back(Dep.MOIdx, NewDep->second);
+ } else {
+ // Dependencies must be rematerialized in def-use order.
+ unsigned NewDepIdx = rematerializeToPos(Dep.RegIdx, InsertPos, DRI);
+ DRI.DependencyMap.insert({Dep.RegIdx, NewDepIdx});
+ NewDeps.emplace_back(Dep.MOIdx, NewDepIdx);
+ }
+ }
+
+ LLVM_DEBUG(--CallDepth);
+ return createReg(RootIdx, InsertPos, std::move(NewDeps));
+}
+
+void Rematerializer::rollbackRematsOf(unsigned RootIdx) {
+ auto Remats = Rematerializations.find(RootIdx);
+ if (Remats == Rematerializations.end())
+ return;
+
+ LLVM_DEBUG({
+ rdbgs() << "Rolling back rematerializations of " << printID(RootIdx)
+ << '\n';
+ ++CallDepth;
+ });
+
+ reviveRegIfDead(RootIdx);
+ // All of the rematerialization's users must use the revived register.
+ for (unsigned RematRegIdx : Remats->getSecond()) {
+ for (const auto &[UseRegion, RegionUsers] : Regs[RematRegIdx].Uses) {
+ transferRegionUsers(RematRegIdx, RootIdx, UseRegion,
+ /*SupportRollback=*/false);
+ }
+ }
+ Rematerializations.erase(RootIdx);
+
+ LLVM_DEBUG({
+ rdbgs() << "** Rolled back rematerializations of " << printID(RootIdx)
+ << '\n';
+ --CallDepth;
+ });
+}
+
+void Rematerializer::rollback(unsigned RematIdx) {
+ assert(getReg(RematIdx).DefMI && !Rollbackable.contains(RematIdx) &&
+ "cannot rollback dead register");
+ const unsigned ParentRegIdx = getParentOf(RematIdx);
+ reviveRegIfDead(ParentRegIdx);
+ for (const auto &[UseRegion, RegionUsers] : Regs[RematIdx].Uses) {
+ transferRegionUsers(RematIdx, ParentRegIdx, UseRegion,
+ /*SupportRollback=*/false);
+ }
+}
+
+void Rematerializer::reviveRegIfDead(unsigned RootIdx) {
+ assert(!isRematerialization(RootIdx) && "cannot revive rematerialization");
+
+ Reg &Root = Regs[RootIdx];
+ if (!Root.Uses.empty()) {
+ // The register still exists, nothing to do.
+ LLVM_DEBUG(rdbgs() << printID(RootIdx) << " still exists\n");
+ return;
+ }
+
+ assert(Rollbackable.contains(RootIdx) && "not marked rollbackable");
+ assert(Root.DefMI && Root.DefMI->getOpcode() == TargetOpcode::DBG_VALUE &&
+ "not the right opcode");
+ assert(Rematerializations.contains(RootIdx) && "no remats");
+
+ LLVM_DEBUG({
+ rdbgs() << "Partially rolling back " << printID(RootIdx) << '\n';
+ ++CallDepth;
+ });
+
+ // Fully rematerialized dependencies need to be revived. All dependencies gain
+ // a new user.
+ for (const Reg::Dependency &Dep : Root.Dependencies) {
+ reviveRegIfDead(Dep.RegIdx);
+ Regs[Dep.RegIdx].addUser(Root.DefMI, Root.DefRegion);
+ LISUpdates.insert(Dep.RegIdx);
+ }
+
+ // Pick any rematerialization to retrieve the original opcode from.
+ unsigned RematIdx = *Rematerializations.at(RootIdx).begin();
+ Root.DefMI->setDesc(getReg(RematIdx).DefMI->getDesc());
+ for (const auto &[MOIdx, Reg] : Rollbackable.at(RootIdx))
+ Root.DefMI->getOperand(MOIdx).setReg(Reg);
+ Rollbackable.erase(RootIdx);
+ LISUpdates.insert(RootIdx);
+
+ LLVM_DEBUG({
+ rdbgs() << "** Partially rolled back " << printID(RootIdx) << " @ ";
+ LIS.getInstructionIndex(*Root.DefMI).print(dbgs());
+ dbgs() << '\n';
+ --CallDepth;
+ });
+}
+
+void Rematerializer::transferUser(unsigned FromRegIdx, unsigned ToRegIdx,
+ MachineInstr &UserMI, bool SupportRollback) {
+ transferUserInternal(FromRegIdx, ToRegIdx, UserMI);
+ unsigned UserRegion = MIRegion[&UserMI];
+ Regs[FromRegIdx].eraseUser(&UserMI, UserRegion);
+ Regs[ToRegIdx].addUser(&UserMI, UserRegion);
+ deleteRegIfUnused(FromRegIdx, SupportRollback);
+}
+
+void Rematerializer::transferRegionUsers(unsigned FromRegIdx, unsigned ToRegIdx,
+ unsigned UseRegion,
+ bool SupportRollback) {
+ auto &FromRegUsers = Regs[FromRegIdx].Uses;
+ auto UsesIt = FromRegUsers.find(UseRegion);
+ if (UsesIt == FromRegUsers.end())
+ return;
+
+ const SmallDenseSet<MachineInstr *, 4> &RegionUsers = UsesIt->getSecond();
+ for (MachineInstr *UserMI : RegionUsers)
+ transferUserInternal(FromRegIdx, ToRegIdx, *UserMI);
+ Regs[ToRegIdx].addUsers(RegionUsers, UseRegion);
+ FromRegUsers.erase(UseRegion);
+ deleteRegIfUnused(FromRegIdx, SupportRollback);
+}
+
+void Rematerializer::transferUserInternal(unsigned FromRegIdx,
+ unsigned ToRegIdx,
+ MachineInstr &UserMI) {
+ assert(MIRegion.contains(&UserMI) && "unknown user");
+ assert(getReg(FromRegIdx).Uses.at(MIRegion.at(&UserMI)).contains(&UserMI) &&
+ "not a user");
+ assert(FromRegIdx != ToRegIdx && "identical registers");
+ assert(getParentOrSelf(FromRegIdx) == getParentOrSelf(ToRegIdx) &&
+ "unrelated registers");
+
+ LLVM_DEBUG(rdbgs() << "User transfer from " << printID(FromRegIdx) << " to "
+ << printID(ToRegIdx) << ": " << printUser(&UserMI)
+ << '\n');
+
+ UserMI.substituteRegister(getReg(FromRegIdx).getDefReg(),
+ getReg(ToRegIdx).getDefReg(), 0, TRI);
+ LISUpdates.insert(FromRegIdx);
+ LISUpdates.insert(ToRegIdx);
+
+ // If the user is rematerializable, we must change its dependency to the
+ // new register.
+ if (unsigned UserRegIdx = getDefRegIdx(UserMI); UserRegIdx != NoReg) {
+ // Look for the user's dependency that matches the register.
+ for (Reg::Dependency &Dep : Regs[UserRegIdx].Dependencies) {
+ if (Dep.RegIdx == FromRegIdx) {
+ Dep.RegIdx = ToRegIdx;
+ return;
+ }
+ }
+ llvm_unreachable("broken dependency");
+ }
+}
+
+void Rematerializer::updateLiveIntervals() {
+ DenseSet<Register> SeenUnrematRegs;
+ for (unsigned RegIdx : LISUpdates) {
+ const Reg &UpdateReg = getReg(RegIdx);
+ assert(UpdateReg.DefMI || Rollbackable.contains(RegIdx) && "dead register");
+
+ Register DefReg = UpdateReg.getDefReg();
+ if (LIS.hasInterval(DefReg))
+ LIS.removeInterval(DefReg);
+ LIS.createAndComputeVirtRegInterval(DefReg);
+
+ LLVM_DEBUG({
+ rdbgs() << "Re-computed interval for " << printID(RegIdx) << ": ";
+ LIS.getInterval(DefReg).print(dbgs());
+ rdbgs() << '\n' << printRegUsers(RegIdx);
+ });
+
+ // Update intervals for unrematerializable operands.
+ for (unsigned MOIdx : getUnrematableOprds(RegIdx)) {
+ Register UnrematReg = UpdateReg.DefMI->getOperand(MOIdx).getReg();
+ if (!SeenUnrematRegs.insert(UnrematReg).second)
+ continue;
+ LIS.removeInterval(UnrematReg);
+ LIS.createAndComputeVirtRegInterval(UnrematReg);
+ LLVM_DEBUG(
+ dbgs() << " Re-computed interval for register "
+ << printReg(UnrematReg, &TRI,
+ UpdateReg.DefMI->getOperand(MOIdx).getSubReg(),
+ &MRI)
+ << '\n');
+ }
+ }
+ LISUpdates.clear();
+}
+
+void Rematerializer::commitRematerializations() {
+ for (auto &[RegIdx, _] : Rollbackable)
+ deleteReg(RegIdx);
+ Rollbackable.clear();
+}
+
+bool Rematerializer::isMOAvailableAtUses(const MachineOperand &MO,
+ ArrayRef<SlotIndex> Uses) const {
+ if (Uses.empty())
+ return true;
+ Register Reg = MO.getReg();
+ unsigned SubIdx = MO.getSubReg();
+ LaneBitmask Mask = SubIdx ? TRI.getSubRegIndexLaneMask(SubIdx)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ const LiveInterval &LI = LIS.getInterval(Reg);
+ const VNInfo *DefVN =
+ LI.getVNInfoAt(LIS.getInstructionIndex(*MO.getParent()).getRegSlot(true));
+ for (SlotIndex Use : Uses) {
+ if (!isAvailableAtUse(DefVN, Mask, Use, LI))
+ return false;
+ }
+ return true;
+}
+
+unsigned Rematerializer::findRematInRegion(unsigned RegIdx, unsigned Region,
+ SlotIndex Before) const {
+ auto It = Rematerializations.find(getParentOrSelf(RegIdx));
+ if (It == Rematerializations.end())
+ return NoReg;
+ const SmallDenseSet<unsigned, 4> &Remats = It->getSecond();
+
+ SlotIndex BestSlot;
+ unsigned BestRegIdx = NoReg;
+ for (unsigned RematRegIdx : Remats) {
+ const Reg &RematReg = getReg(RematRegIdx);
+ if (RematReg.DefRegion != Region || RematReg.Uses.empty())
+ continue;
+ SlotIndex RematRegSlot =
+ LIS.getInstructionIndex(*RematReg.DefMI).getRegSlot();
+ if (RematRegSlot < Before &&
+ (BestRegIdx == NoReg || RematRegSlot > BestSlot)) {
+ BestSlot = RematRegSlot;
+ BestRegIdx = RematRegIdx;
+ }
+ }
+ return BestRegIdx;
+}
+
+bool Rematerializer::deleteRegIfUnused(unsigned RootIdx, bool SupportRollback) {
+ Reg &Root = Regs[RootIdx];
+ if (!Root.Uses.empty())
+ return false;
+ LLVM_DEBUG({
+ rdbgs() << "Deleting " << printID(RootIdx) << " with no users\n";
+ ++CallDepth;
+ });
+
+ Register DefReg = Root.getDefReg();
+ for (const Reg::Dependency &Dep : Root.Dependencies) {
+ LLVM_DEBUG(rdbgs() << "Deleting user from " << printID(Dep.RegIdx) << "\n");
+ Regs[Dep.RegIdx].eraseUser(Root.DefMI, Root.DefRegion);
+ deleteRegIfUnused(Dep.RegIdx, SupportRollback);
+ }
+
+ LIS.removeInterval(DefReg);
+ LISUpdates.erase(RootIdx);
+ if (SupportRollback) {
+ // Replace all read registers with the null one to prevent issues in live
+ // interval calculations. Store mappings between operand indices and
+ // original registers for potential rolqlback.
+ DenseMap<unsigned, Register> &RegMap =
+ Rollbackable.try_emplace(RootIdx).first->getSecond();
+ for (auto [Idx, MO] : enumerate(Root.DefMI->operands())) {
+ if (MO.isReg() && MO.readsReg()) {
+ RegMap.insert({Idx, MO.getReg()});
+ MO.setReg(Register());
+ }
+ }
+ Root.DefMI->setDesc(TII.get(TargetOpcode::DBG_VALUE));
+ } else {
+ deleteReg(RootIdx);
+ }
+ if (isRematerialization(RootIdx)) {
+ SmallDenseSet<unsigned, 4> &Remats =
+ Rematerializations.at(getParentOf(RootIdx));
+ assert(Remats.contains(RootIdx) && "broken link between remat and parent");
+ Remats.erase(RootIdx);
+ if (Remats.empty())
+ Rematerializations.erase(RootIdx);
+ }
+ LLVM_DEBUG(--CallDepth);
+ return true;
+}
+
+void Rematerializer::deleteReg(unsigned RegIdx) {
+ Reg &DeleteReg = Regs[RegIdx];
+ assert(DeleteReg.DefMI && "register was already deleted");
+ // It is not possible for the deleted instruction to be the upper region
+ // boundary since we don't ever consider them rematerializable.
+ if (Regions[DeleteReg.DefRegion].first == DeleteReg.DefMI)
+ Regions[DeleteReg.DefRegion].first =
+ std::next(MachineBasicBlock::iterator(DeleteReg.DefMI));
+ LIS.RemoveMachineInstrFromMaps(*DeleteReg.DefMI);
+ DeleteReg.DefMI->eraseFromParent();
+ MIRegion.erase(DeleteReg.DefMI);
+ DeleteReg.DefMI = nullptr;
+}
+
+bool Rematerializer::analyze() {
+ MIRegion.clear();
+ Regs.clear();
+ RegToIdx.clear();
+ LISUpdates.clear();
+ Rollbackable.clear();
+ if (Regions.empty())
+ return false;
+
+ // Maps each basic block number to regions that are part of the BB.
+ DenseMap<unsigned, SmallVector<unsigned, 4>> RegionsPerBlock;
+
+ const unsigned NumRegions = Regions.size();
+ for (unsigned I = 0; I < NumRegions; ++I) {
+ RegionBoundaries Region = Regions[I];
+ for (auto MI = Region.first; MI != Region.second; ++MI)
+ MIRegion.insert({&*MI, I});
+ MachineBasicBlock *MBB = Region.first->getParent();
+ if (Region.second != MBB->end())
+ MIRegion.insert({&*Region.second, I});
+ RegionsPerBlock[MBB->getNumber()].push_back(I);
+ }
+
+ // Visit regions in dominator tree pre-order to ensure that regions defining
+ // registers come before regions using them.
+ MachineDominatorTree MDT(MF);
+ for (MachineDomTreeNode *MBB : depth_first(&MDT)) {
+ auto MBBRegions = RegionsPerBlock.find(MBB->getBlock()->getNumber());
+ if (MBBRegions == RegionsPerBlock.end())
+ continue;
+ auto MBBRegionsIt = RegionsTopDown ? MBBRegions->getSecond()
+ : reverse(MBBRegions->getSecond());
+ for (unsigned I : MBBRegionsIt)
+ collectRegs(I);
+ }
+
+ LLVM_DEBUG({
+ for (unsigned I = 0, E = getNumRegs(); I < E; ++I)
+ dbgs() << printTree(I) << '\n';
+ });
+ return !Regs.empty();
+}
+
+void Rematerializer::collectRegs(unsigned DefRegion) {
+ // Collect partially rematerializable registers in instruction order within
+ // each region. This guarantees that, within a single region, partially
+ // rematerializable registers used in instructions defining other partially
+ // rematerializable registers are visited first. This is important to
+ // guarantee that all of a register's dependencies are visited before the
+ // register itself.
+ RegionBoundaries Bounds = Regions[DefRegion];
+ for (auto MI = Bounds.first; MI != Bounds.second; ++MI) {
+ MachineInstr &DefMI = *MI;
+ if (!isMIRematerializable(DefMI))
+ continue;
+
+ Reg &CurrentReg = Regs.emplace_back();
+ CurrentReg.DefMI = &DefMI;
+ CurrentReg.DefRegion = DefRegion;
+ Register DefReg = CurrentReg.getDefReg();
+ unsigned SubIdx = DefMI.getOperand(0).getSubReg();
+ CurrentReg.Mask = SubIdx ? TRI.getSubRegIndexLaneMask(SubIdx)
+ : MRI.getMaxLaneMaskForVReg(DefReg);
+
+ // Collect the candidate's direct users, both rematerializable and
+ // unrematerializable.
+ for (MachineInstr &UseMI : MRI.use_nodbg_instructions(DefReg)) {
+ auto UseRegion = MIRegion.find(&UseMI);
+ if (UseRegion == MIRegion.end()) {
+ // Only lone MI terminators can trigger this condition. They are not
+ // part of any region so we cannot rematerialize next to them. Just
+ // consider this register unrematerializable.
+ CurrentReg.Uses.clear();
+ break;
+ }
+ CurrentReg.addUser(&UseMI, UseRegion->second);
+ }
+ if (CurrentReg.Uses.empty()) {
+ Regs.pop_back();
+ continue;
+ }
+
+ // Collect the candidate's dependencies. If the same register is used
+ // multiple times we just need to store it once.
+ SmallDenseSet<Register, 4> AllDepRegs;
+ SmallVector<unsigned, 2> &Unrematable = UnrematableOprds.emplace_back();
+ for (const auto &[MOIdx, MO] : enumerate(CurrentReg.DefMI->operands())) {
+ Register DepReg = isRegDependency(MO);
+ if (!DepReg || !AllDepRegs.insert(DepReg).second)
+ continue;
+ if (auto DepIt = RegToIdx.find(DepReg); DepIt != RegToIdx.end()) {
+ Reg::Dependency Dep(MOIdx, DepIt->second);
+ CurrentReg.Dependencies.push_back(Dep);
+ } else
+ Unrematable.push_back(MOIdx);
+ }
+
+ // The register is rematerializable.
+ RegToIdx.insert({DefReg, Regs.size() - 1});
+ }
+
+ assert(Regs.size() == UnrematableOprds.size());
+}
+
+bool Rematerializer::isMIRematerializable(const MachineInstr &MI) const {
+ if (!TII.isReMaterializable(MI))
+ return false;
+
+ for (const MachineOperand &MO : MI.all_uses()) {
+ // We can't remat physreg uses, unless it is a constant or an ignorable
+ // use (e.g. implicit exec use on VALU instructions)
+ if (MO.getReg().isPhysical()) {
+ if (MRI.isConstantPhysReg(MO.getReg()) || TII.isIgnorableUse(MO))
+ continue;
+ return false;
+ }
+ }
+
+ // We only support rematerializing virtual registers with one definition.
+ Register DefReg = MI.getOperand(0).getReg();
+ return DefReg.isVirtual() && MRI.hasOneDef(DefReg);
+}
+
+unsigned Rematerializer::getDefRegIdx(const MachineInstr &MI) const {
+ if (!MI.getNumOperands() || !MI.getOperand(0).isReg() ||
+ MI.getOperand(0).readsReg())
+ return NoReg;
+ Register Reg = MI.getOperand(0).getReg();
+ auto UserRegIt = RegToIdx.find(Reg);
+ if (UserRegIt == RegToIdx.end())
+ return NoReg;
+ return UserRegIt->second;
+}
+
+unsigned
+Rematerializer::createReg(unsigned RegIdx,
+ MachineBasicBlock::iterator InsertPos,
+ SmallVectorImpl<Reg::Dependency> &&Dependencies) {
+ unsigned UseRegion = MIRegion.at(&*InsertPos);
+ unsigned NewRegIdx = Regs.size();
+
+ Reg &NewReg = Regs.emplace_back();
+ Reg &FromReg = Regs[RegIdx];
+ NewReg.Mask = FromReg.Mask;
+ NewReg.DefRegion = UseRegion;
+ NewReg.Dependencies = std::move(Dependencies);
+
+ // Track rematerialization link between registers. Parents are always
+ // registers that existed originally, and rematerializations are always
+ // attached to them.
+ unsigned ParentIdx =
+ isRematerialization(RegIdx) ? getParentOf(RegIdx) : RegIdx;
+ Parents.push_back(ParentIdx);
+ Rematerializations[ParentIdx].insert(NewRegIdx);
+
+ // Use the TII to rematerialize the defining instruction with a new defined
+ // register.
+ Register NewDefReg = MRI.cloneVirtualRegister(FromReg.getDefReg());
+ TII.reMaterialize(*InsertPos->getParent(), InsertPos, NewDefReg, 0,
+ *FromReg.DefMI);
+ NewReg.DefMI = &*std::prev(InsertPos);
+ RegToIdx.insert({NewDefReg, NewRegIdx});
+
+ // Update the DAG.
+ RegionBoundaries &Bounds = Regions[UseRegion];
+ if (Bounds.first == std::next(MachineBasicBlock::iterator(NewReg.DefMI)))
+ Bounds.first = NewReg.DefMI;
+ LIS.InsertMachineInstrInMaps(*NewReg.DefMI);
+ MIRegion.emplace_or_assign(NewReg.DefMI, UseRegion);
+ LISUpdates.insert(NewRegIdx);
+
+ // Replace dependencies as needed in the rematerialized MI. All dependencies
+ // of the latter gain a new user.
+ auto ZipedDeps = zip_equal(FromReg.Dependencies, NewReg.Dependencies);
+ for (const auto &[OldDep, NewDep] : ZipedDeps) {
+ assert(OldDep.MOIdx == NewDep.MOIdx && "operand mismatch");
+ LLVM_DEBUG(rdbgs() << " Operand #" << OldDep.MOIdx << ": "
+ << printID(OldDep.RegIdx) << " -> "
+ << printID(NewDep.RegIdx) << '\n');
+
+ Reg &NewDepReg = Regs[NewDep.RegIdx];
+ if (OldDep.RegIdx != NewDep.RegIdx) {
+ Register OldDefReg = FromReg.DefMI->getOperand(OldDep.MOIdx).getReg();
+ NewReg.DefMI->substituteRegister(OldDefReg, NewDepReg.getDefReg(), 0,
+ TRI);
+ LISUpdates.insert(OldDep.RegIdx);
+ }
+ NewDepReg.addUser(NewReg.DefMI, UseRegion);
+ LISUpdates.insert(NewDep.RegIdx);
+ }
+
+ LLVM_DEBUG({
+ rdbgs() << "** Rematerialized " << printID(RegIdx) << " as "
+ << printRematReg(NewRegIdx) << '\n';
+ });
+ return NewRegIdx;
+}
+
+std::pair<MachineInstr *, MachineInstr *>
+Rematerializer::Reg::getRegionUseBounds(unsigned UseRegion,
+ const LiveIntervals &LIS) const {
+ auto It = Uses.find(UseRegion);
+ if (It == Uses.end())
+ return {nullptr, nullptr};
+ const RegionUsers &RegionUsers = It->getSecond();
+ assert(!RegionUsers.empty() && "empty userset in region");
+
+ auto User = RegionUsers.begin(), UserEnd = RegionUsers.end();
+ MachineInstr *FirstMI = *User, *LastMI = FirstMI;
+ SlotIndex FirstIndex = LIS.getInstructionIndex(*FirstMI),
+ LastIndex = FirstIndex;
+
+ while (++User != UserEnd) {
+ SlotIndex UserIndex = LIS.getInstructionIndex(**User);
+ if (UserIndex < FirstIndex) {
+ FirstIndex = UserIndex;
+ FirstMI = *User;
+ } else if (UserIndex > LastIndex) {
+ LastIndex = UserIndex;
+ LastMI = *User;
+ }
+ }
+
+ return {FirstMI, LastMI};
+}
+
+void Rematerializer::Reg::addUser(MachineInstr *MI, unsigned Region) {
+ Uses[Region].insert(MI);
+}
+
+void Rematerializer::Reg::addUsers(const RegionUsers &NewUsers,
+ unsigned Region) {
+ Uses[Region].insert_range(NewUsers);
+}
+
+void Rematerializer::Reg::eraseUser(MachineInstr *MI, unsigned Region) {
+ assert(Uses.contains(Region) && "no user in region");
+ assert(Uses.at(Region).contains(MI) && "user not in region");
+ RegionUsers &RUsers = Uses[Region];
+ if (RUsers.size() == 1)
+ Uses.erase(Region);
+ else
+ RUsers.erase(MI);
+}
+
+Printable Rematerializer::printTree(unsigned RootIdx) const {
+ return Printable([&, RootIdx](raw_ostream &OS) {
+ DenseMap<unsigned, unsigned> RegDepths;
+ std::function<void(unsigned, unsigned)> WalkTree =
+ [&](unsigned RegIdx, unsigned Depth) -> void {
+ unsigned MaxDepth = std::max(RegDepths.lookup_or(RegIdx, Depth), Depth);
+ RegDepths.emplace_or_assign(RegIdx, MaxDepth);
+ for (const Reg::Dependency &Dep : getReg(RegIdx).Dependencies)
+ WalkTree(Dep.RegIdx, Depth + 1);
+ };
+ WalkTree(RootIdx, 0);
+
+ // Sort in decreasing depth order to print root at the bottom.
+ SmallVector<std::pair<unsigned, unsigned>> Regs(RegDepths.begin(),
+ RegDepths.end());
+ sort(Regs, [](const auto &LHS, const auto &RHS) {
+ return LHS.second > RHS.second;
+ });
+
+ OS << printID(RootIdx) << " has " << Regs.size() - 1 << " dependencies\n";
+ for (const auto &[RegIdx, Depth] : Regs) {
+ std::string Shift(2 * Depth, ' ');
+ OS << Shift << (Depth ? '|' : '*') << ' '
+ << printRematReg(RegIdx, /*SkipRegions=*/Depth) << '\n';
+ }
+ OS << printRegUsers(RootIdx);
+ });
+}
+
+Printable Rematerializer::printID(unsigned RegIdx) const {
+ return Printable([&, RegIdx](raw_ostream &OS) {
+ const Reg &PrintReg = getReg(RegIdx);
+ OS << '(' << RegIdx << '/';
+ if (!PrintReg.DefMI) {
+ OS << "<dead>";
+ } else {
+ OS << printReg(PrintReg.getDefReg(), &TRI,
+ PrintReg.DefMI->getOperand(0).getSubReg(), &MRI);
+ }
+ OS << ")[" << PrintReg.DefRegion << "]";
+ });
+}
+
+Printable Rematerializer::printRematReg(unsigned RegIdx,
+ bool SkipRegions) const {
+ return Printable([&, RegIdx, SkipRegions](raw_ostream &OS) {
+ const Reg &PrintReg = getReg(RegIdx);
+ if (!SkipRegions) {
+ OS << printID(RegIdx) << " [" << PrintReg.DefRegion;
+ if (!PrintReg.Uses.empty()) {
+ assert(PrintReg.DefMI && "dead register cannot have uses");
+ const LiveInterval &LI = LIS.getInterval(PrintReg.getDefReg());
+ // First display all regions in which the register is live-through and
+ // not used.
+ bool First = true;
+ for (const auto [I, Bounds] : enumerate(Regions)) {
+ if (Bounds.first == Bounds.second)
+ continue;
+ if (!PrintReg.Uses.contains(I) &&
+ LI.liveAt(LIS.getInstructionIndex(*Bounds.first)) &&
+ LI.liveAt(LIS.getInstructionIndex(*std::prev(Bounds.second))
+ .getRegSlot())) {
+ OS << (First ? " - " : ",") << I;
+ First = false;
+ }
+ }
+ OS << (First ? " --> " : " -> ");
+
+ // Then display regions in which the register is used.
+ auto It = PrintReg.Uses.begin();
+ OS << It->first;
+ while (++It != PrintReg.Uses.end())
+ OS << "," << It->first;
+ }
+ OS << "] ";
+ }
+ OS << printID(RegIdx) << ' ';
+ PrintReg.DefMI->print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false,
+ /*SkipDebugLoc=*/false, /*AddNewLine=*/false);
+ OS << " @ ";
+ LIS.getInstructionIndex(*PrintReg.DefMI).print(OS);
+ });
+}
+
+Printable Rematerializer::printRegUsers(unsigned RegIdx) const {
+ return Printable([&, RegIdx](raw_ostream &OS) {
+ for (const auto &[_, Users] : getReg(RegIdx).Uses) {
+ for (MachineInstr *MI : Users)
+ dbgs() << " User " << printUser(MI) << '\n';
+ }
+ });
+}
+
+Printable Rematerializer::printUser(const MachineInstr *MI) const {
+ return Printable([&, MI](raw_ostream &OS) {
+ unsigned RegIdx = getDefRegIdx(*MI);
+ if (RegIdx != NoReg)
+ OS << printID(RegIdx);
+ else
+ OS << "(-/-)[" << MIRegion.at(MI) << ']';
+ OS << ' ';
+ MI->print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false,
+ /*SkipDebugLoc=*/false, /*AddNewLine=*/false);
+ OS << " @ ";
+ LIS.getInstructionIndex(*MI).print(dbgs());
+ });
+}
diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt
index 80d10138d7bfe..2b27a765c93c5 100644
--- a/llvm/unittests/CodeGen/CMakeLists.txt
+++ b/llvm/unittests/CodeGen/CMakeLists.txt
@@ -41,6 +41,7 @@ add_llvm_unittest(CodeGenTests
RegAllocScoreTest.cpp
RegisterTest.cpp
PassManagerTest.cpp
+ RematerializerTest.cpp
ScalableVectorMVTsTest.cpp
SchedBoundary.cpp
SelectionDAGAddressAnalysisTest.cpp
diff --git a/llvm/unittests/CodeGen/RematerializerTest.cpp b/llvm/unittests/CodeGen/RematerializerTest.cpp
new file mode 100644
index 0000000000000..0f5789897e083
--- /dev/null
+++ b/llvm/unittests/CodeGen/RematerializerTest.cpp
@@ -0,0 +1,450 @@
+//===- RematerializerTest.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Rematerializer.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/CodeGen/MachineDomTreeUpdater.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePassManager.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+using namespace llvm;
+
+class RematerializerTest : public testing::Test {
+public:
+ LLVMContext Context;
+ std::unique_ptr<TargetMachine> TM;
+ std::unique_ptr<Module> M;
+ std::unique_ptr<MachineModuleInfo> MMI;
+ std::unique_ptr<MIRParser> MIR;
+ std::unique_ptr<SmallVector<Rematerializer::RegionBoundaries>> Regions;
+ std::unique_ptr<Rematerializer> Remater;
+
+ LoopAnalysisManager LAM;
+ MachineFunctionAnalysisManager MFAM;
+ FunctionAnalysisManager FAM;
+ CGSCCAnalysisManager CGAM;
+
+ ModulePassManager MPM;
+ FunctionPassManager FPM;
+ MachineFunctionPassManager MFPM;
+ ModuleAnalysisManager MAM;
+
+ static void SetUpTestCase() {
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+ }
+
+ void SetUp() override {
+ Triple TargetTriple("amdgcn--");
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
+ if (!T)
+ GTEST_SKIP();
+ TargetOptions Options;
+ TM = std::unique_ptr<TargetMachine>(T->createTargetMachine(
+ TargetTriple, "gfx950", "", Options, std::nullopt));
+ if (!TM)
+ GTEST_SKIP();
+ MMI = std::make_unique<MachineModuleInfo>(TM.get());
+
+ PassBuilder PB(TM.get());
+ PB.registerModuleAnalyses(MAM);
+ PB.registerCGSCCAnalyses(CGAM);
+ PB.registerFunctionAnalyses(FAM);
+ PB.registerLoopAnalyses(LAM);
+ PB.registerMachineFunctionAnalyses(MFAM);
+ PB.crossRegisterProxies(LAM, FAM, CGAM, MAM, &MFAM);
+ MAM.registerPass([&] { return MachineModuleAnalysis(*MMI); });
+ }
+
+ bool parseMIR(StringRef MIRCode) {
+ SMDiagnostic Diagnostic;
+ std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
+ MIR = createMIRParser(std::move(MBuffer), Context);
+ if (!MIR)
+ return false;
+
+ M = MIR->parseIRModule();
+ M->setDataLayout(TM->createDataLayout());
+
+ if (MIR->parseMachineFunctions(*M, MAM)) {
+ M.reset();
+ return false;
+ }
+
+ return true;
+ }
+
+ Rematerializer &getRematerializer(StringRef MIR, StringRef FunName) {
+ MachineFunction &MF =
+ FAM.getResult<MachineFunctionAnalysis>(*M->getFunction(FunName))
+ .getMF();
+ LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
+
+ Regions = std::make_unique<SmallVector<Rematerializer::RegionBoundaries>>();
+ /// Each MBB is its own region. This wouldn't be how e.g., the scheduler
+ /// would do that but here we only want to test the rematerializer's API so
+ /// it is good enough.
+ for (auto MBB = MF.begin(), MBBEnd = MF.end(); MBB != MBBEnd; ++MBB)
+ Regions->push_back({MBB->begin(), MBB->end()});
+ Remater = std::make_unique<Rematerializer>(MF, *Regions,
+ /*RegionsTopDown=*/false, LIS);
+ Remater->analyze();
+ return *Remater;
+ }
+};
+
+using MBBRegionsVector = SmallVector<SchedRegion, 16>;
+
+/// Asserts that region RegionIdx contains RegionSize instructions.
+#define ASSERT_REGION_SIZE(RegionIdx, RegionSize) \
+ { \
+ const auto &Region = (*Regions)[RegionIdx]; \
+ ASSERT_EQ(std::distance(Region.first, Region.second), RegionSize); \
+ }
+
+/// Asserts that regions have sizes RegionSizes, which must be an iterable
+/// object with the same number of elements as the number of regions.
+#define ASSERT_REGION_SIZES(RegionSizes) \
+ { \
+ ASSERT_EQ(RegionSizes.size(), Regions->size()); \
+ for (const auto [RegionIdx, Size] : enumerate(RegionSizes)) \
+ ASSERT_REGION_SIZE(RegionIdx, Size); \
+ }
+
+/// Asserts that register RegIdx in the rematerializer has a total of N users.
+#define ASSERT_NUM_USERS(RegIdx, N) \
+ { \
+ unsigned NumUsers = 0; \
+ for (const auto &[_, RegionUses] : Remater.getReg(RegIdx).Uses) \
+ NumUsers += RegionUses.size(); \
+ ASSERT_EQ(NumUsers, static_cast<unsigned>(N)); \
+ }
+
+/// Asserts that register RegIdx in the remterializer hsa no users.
+#define ASSERT_NO_USERS(RegIdx) ASSERT_NUM_USERS(RegIdx, 0)
+
+/// Asserts that rematerialized register RegIdx has parent ParentIdx, is defined
+/// in region DefRegionIdx, and has a total of NumUsers users.
+#define ASSERT_REMAT(RegIdx, ParentIdx, DefRegionIdx, NumUsers) \
+ { \
+ const Rematerializer::Reg &RematReg = Remater.getReg(RegIdx); \
+ ASSERT_EQ(Remater.getParentOf(RegIdx), ParentIdx); \
+ ASSERT_EQ(RematReg.DefRegion, DefRegionIdx); \
+ ASSERT_NUM_USERS(RegIdx, NumUsers); \
+ }
+
+/// Rematerializes a tree of registers to a single user in different ways using
+/// the dependency reuse mechanics and the coarse-grained or more fine-grained
+/// API. Rollback rematerializations in-between each different wave of
+/// rematerializations.
+TEST_F(RematerializerTest, TreeRematRollback) {
+ StringRef MIR = R"(
+name: TreeRematRollback
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ %2:vgpr_32 = V_ADD_U32_e32 %0, %1, implicit $exec
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
+
+ bb.1:
+ S_NOP 0, implicit %4
+ S_ENDPGM 0
+...
+)";
+ ASSERT_TRUE(parseMIR(MIR));
+ Rematerializer &Remater = getRematerializer(MIR, "TreeRematRollback");
+ Rematerializer::DependencyReuseInfo DRI;
+
+ // MBB/Region indices.
+ const unsigned MBB0 = 0, MBB1 = 1;
+ SmallVector<unsigned, 2> RegionSizes{5, 2};
+ ASSERT_REGION_SIZES(RegionSizes);
+
+ // Indices of rematerializable registers.
+ unsigned NumRegs = 0;
+ const unsigned Cst0 = NumRegs++, Cst1 = NumRegs++, Add01 = NumRegs++,
+ Cst3 = NumRegs++, Add23 = NumRegs++;
+ ASSERT_EQ(Remater.getNumRegs(), NumRegs);
+
+ // Rematerialize Add23 with all transitive dependencies.
+ {
+ Remater.rematerializeToRegion(/*RootIdx=*/Add23, /*UseRegion=*/MBB1,
+ /*SupportRollback=*/true, DRI);
+ Remater.updateLiveIntervals();
+
+ // None of the original registers have any users, but they still are in the
+ // MIR because we enabled rollback support.
+ ASSERT_NO_USERS(Cst0);
+ ASSERT_NO_USERS(Cst1);
+ ASSERT_NO_USERS(Add01);
+ ASSERT_NO_USERS(Cst3);
+ ASSERT_NO_USERS(Add23);
+
+ // Copies of all MIs were inserted into the second MBB.
+ RegionSizes[MBB1] += 5;
+ ASSERT_REGION_SIZES(RegionSizes);
+ NumRegs += 5;
+ ASSERT_EQ(Remater.getNumRegs(), NumRegs);
+ }
+
+ // After rollback all rematerializations are removed from the MIR.
+ Remater.rollbackRematsOf(Add23);
+ RegionSizes[MBB1] -= 5;
+ ASSERT_REGION_SIZES(RegionSizes);
+
+ // Rematerialize Add23 only with its direct dependencies, reuse the rest.
+ {
+ DRI.clear().reuse(Cst0).reuse(Cst1);
+ Remater.rematerializeToRegion(/*RootIdx=*/Add23, /*UseRegion=*/MBB1,
+ /*SupportRollback=*/true, DRI);
+ Remater.updateLiveIntervals();
+
+ // Re-used registers have rematerializations as their single user (original
+ // users are dead). Rematerialized registers have no users.
+ ASSERT_NUM_USERS(Cst0, 1);
+ ASSERT_NUM_USERS(Cst1, 1);
+ ASSERT_NO_USERS(Add01);
+ ASSERT_NO_USERS(Cst3);
+ ASSERT_NO_USERS(Add23);
+
+ // Only immediate dependencies are copied to the second MBB.
+ RegionSizes[MBB1] += 3;
+ ASSERT_REGION_SIZES(RegionSizes);
+ NumRegs += 3;
+ ASSERT_EQ(Remater.getNumRegs(), NumRegs);
+ }
+
+ // After rollback all rematerializations are removed from the MIR.
+ Remater.rollbackRematsOf(Add23);
+ RegionSizes[MBB1] -= 3;
+ ASSERT_REGION_SIZES(RegionSizes);
+
+ // Rematerialize Add23 only with its direct dependencies as before, but
+ // with as fine-grained operations as possible.
+ {
+ MachineInstr *NopMI = &*(*Regions)[MBB1].first;
+
+ DRI.clear().reuse(Cst0).reuse(Cst1);
+ const unsigned RematAdd01 =
+ Remater.rematerializeToPos(/*RootIdx=*/Add01, NopMI, DRI);
+ // This adds an additional user to the used constants, and does not change
+ // existing users for the original register.
+ ASSERT_NO_USERS(RematAdd01);
+ ASSERT_NUM_USERS(Add01, 1);
+ ASSERT_NUM_USERS(Cst0, 2);
+ ASSERT_NUM_USERS(Cst1, 2);
+
+ DRI.clear();
+ const unsigned RematCst3 =
+ Remater.rematerializeToPos(/*RootIdx=*/Cst3, NopMI, DRI);
+ // This does not change existing users for the original register.
+ ASSERT_NO_USERS(RematCst3);
+ ASSERT_NUM_USERS(Cst3, 1);
+
+ DRI.clear().useRemat(Add01, RematAdd01).useRemat(Cst3, RematCst3);
+ const unsigned RematAdd23 =
+ Remater.rematerializeToPos(/*RootIdx=*/Add23, NopMI, DRI);
+ // This adds a user to used rematerializations, and does not change existing
+ // users for the original register.
+ ASSERT_NO_USERS(RematAdd23);
+ ASSERT_NUM_USERS(Add23, 1);
+ ASSERT_NUM_USERS(RematAdd01, 1);
+ ASSERT_NUM_USERS(RematCst3, 1);
+
+ // Finally transfer the NOP user from the original to the rematerialized
+ // register.
+ Remater.transferUser(Add23, RematAdd23, *NopMI, /*SupportRollback=*/true);
+ ASSERT_NO_USERS(Add23);
+ ASSERT_NUM_USERS(RematAdd23, 1);
+
+ RegionSizes[MBB1] += 3;
+ ASSERT_REGION_SIZES(RegionSizes);
+ NumRegs += 3;
+ ASSERT_EQ(Remater.getNumRegs(), NumRegs);
+ }
+
+ // This time don't rollback; commit the rematerializations. This finally
+ // deletes unused registers in the first block. However the number of
+ // registers tracked by the rematerializer doesn't change.
+ Remater.updateLiveIntervals();
+ Remater.commitRematerializations();
+ RegionSizes[MBB0] -= 3;
+ ASSERT_REGION_SIZES(RegionSizes);
+ ASSERT_EQ(Remater.getNumRegs(), NumRegs);
+}
+
+/// Rematerializes a single register to multiple regions, tracking that
+/// rematerializations are linked correctly and making sure that the original
+/// register is deleted automatically when it no longer has any uses.
+TEST_F(RematerializerTest, MultiRegionsRemat) {
+ StringRef MIR = R"(
+name: MultiRegionsRemat
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+
+ bb.1:
+ S_NOP 0, implicit %0, implicit %0
+
+ bb.2:
+ S_NOP 0, implicit %0
+ S_NOP 0, implicit %0
+
+ bb.3:
+ S_NOP 0, implicit %0
+ S_ENDPGM 0
+...
+)";
+ ASSERT_TRUE(parseMIR(MIR));
+ Rematerializer &Remater = getRematerializer(MIR, "MultiRegionsRemat");
+ Rematerializer::DependencyReuseInfo DRI;
+
+ // MBB/Region indices.
+ const unsigned MBB0 = 0, MBB1 = 1, MBB2 = 2, MBB3 = 3;
+ SmallVector<unsigned, 2> RegionSizes{1, 1, 2, 2};
+ ASSERT_REGION_SIZES(RegionSizes);
+
+ // Indices of rematerializable registers.
+ const unsigned Cst0 = 0;
+ ASSERT_EQ(Remater.getNumRegs(), 1U);
+
+ // Rematerialization to MBB1.
+ const unsigned RematBB1 =
+ Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB1,
+ /*SupportRollback=*/false, DRI);
+ ++RegionSizes[MBB1];
+ ASSERT_REGION_SIZES(RegionSizes);
+ ASSERT_REMAT(/*RegIdx=*/RematBB1, /*ParentIdx=*/Cst0, /*DefRegionIdx=*/MBB1,
+ /*NumUsers=*/1);
+
+ // Rematerialization to MBB2.
+ const unsigned RematBB2 =
+ Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB2,
+ /*SupportRollback=*/false, DRI);
+ ++RegionSizes[MBB2];
+ ASSERT_REGION_SIZES(RegionSizes);
+ ASSERT_REMAT(/*RegIdx=*/RematBB2, /*ParentIdx=*/Cst0, /*DefRegionIdx=*/MBB2,
+ /*NumUsers=*/2);
+
+ // Rematerialization to MBB3. Rematerializing to the last original user
+ // deletes the original register.
+ const unsigned RematBB3 =
+ Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB3,
+ /*SupportRollback=*/false, DRI);
+ --RegionSizes[MBB0];
+ ++RegionSizes[MBB3];
+ ASSERT_REGION_SIZES(RegionSizes);
+ ASSERT_REMAT(/*RegIdx=*/RematBB3, /*ParentIdx=*/Cst0, /*DefRegionIdx=*/MBB3,
+ /*NumUsers=*/1);
+
+ Remater.updateLiveIntervals();
+}
+
+/// Rematerializes a tree of register with some unrematerializable operands to a
+/// final destination in two steps, creating rematerializations of
+/// rematerializations in the process. Make sure that parents of
+/// rematerializations are always original registers.
+TEST_F(RematerializerTest, MultiStep) {
+ StringRef MIR = R"(
+name: MultiStep
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ %2:vgpr_32 = V_ADD_U32_e32 %0, %1, implicit $exec
+ S_NOP 0, implicit %0
+
+ bb.1:
+ %3:vgpr_32 = V_ADD_U32_e32 %2, %2, implicit $exec
+
+ bb.2:
+ S_NOP 0, implicit %3
+ S_ENDPGM 0
+...
+)";
+ ASSERT_TRUE(parseMIR(MIR));
+ Rematerializer &Remater = getRematerializer(MIR, "MultiStep");
+ Rematerializer::DependencyReuseInfo DRI;
+
+ // MBB/Region indices.
+ const unsigned MBB0 = 0, MBB1 = 1, MBB2 = 2;
+ SmallVector<unsigned, 2> RegionSizes{4, 1, 2};
+ ASSERT_REGION_SIZES(RegionSizes);
+
+ // Indices of rematerializable registers.
+ unsigned NumRegs = 0;
+ const unsigned Cst0 = NumRegs++, Add01 = NumRegs++, Add22 = NumRegs++;
+ ASSERT_EQ(Remater.getNumRegs(), NumRegs);
+
+ // Rematerialize Add01 from the first to the second block along with its
+ // single rematerializable dependency (constant 0). The constant 1 has an
+ // implicit def that is non-ignorable so it cannot be rematerialized. The
+ // constant 0 remains in the first block because it has a user there, but the
+ // add is deleted.
+ Remater.rematerializeToRegion(/*RootIdx=*/Add01, /*UseRegion=*/MBB1,
+ /*SupportRollback=*/false, DRI);
+ const unsigned RematCst0 = NumRegs++, RematAdd01 = NumRegs++;
+ RegionSizes[MBB0] -= 1;
+ RegionSizes[MBB1] += 2;
+ ASSERT_REGION_SIZES(RegionSizes);
+ ASSERT_REMAT(/*RegIdx=*/RematCst0, /*ParentIdx=*/Cst0, /*DefRegionIdx=*/MBB1,
+ /*NumUsers=*/1);
+ ASSERT_REMAT(/*RegIdx=*/RematAdd01, /*ParentIdx=*/Add01,
+ /*DefRegionIdx=*/MBB1,
+ /*NumUsers=*/1);
+
+ // We are going to re-rematerialize a register so the LIS need to be
+ // up-to-date.
+ Remater.updateLiveIntervals();
+
+ // Rematerialize Add22 from the second to the third block, which will
+ // also indirectly rematerialize RematAdd01; make sure the latter's
+ // rematerializations's parent is the original register, not RematAdd01.
+ DRI.reuse(RematCst0);
+ Remater.rematerializeToRegion(/*RootIdx=*/Add22, /*UseRegion=*/MBB2,
+ /*SupportRollback=*/false, DRI);
+ const unsigned RematRematAdd01 = NumRegs++, RematAdd22 = NumRegs++;
+ RegionSizes[MBB1] -= 2;
+ RegionSizes[MBB2] += 2;
+ ASSERT_REGION_SIZES(RegionSizes);
+ ASSERT_REMAT(/*RegIdx=*/RematRematAdd01, /*ParentIdx=*/Add01,
+ /*DefRegionIdx=*/MBB2,
+ /*NumUsers=*/1);
+ ASSERT_REMAT(/*RegIdx=*/RematAdd22, /*ParentIdx=*/Add22,
+ /*DefRegionIdx=*/MBB2,
+ /*NumUsers=*/1);
+
+ Remater.updateLiveIntervals();
+}
>From 1e9bd7335d686a842928da55bf69eeeb8f957e1c Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Wed, 21 Jan 2026 02:08:03 +0000
Subject: [PATCH 2/6] Format
---
llvm/lib/CodeGen/Rematerializer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/Rematerializer.cpp b/llvm/lib/CodeGen/Rematerializer.cpp
index b7fbfbf9f7101..1f5f8dee9c0e4 100644
--- a/llvm/lib/CodeGen/Rematerializer.cpp
+++ b/llvm/lib/CodeGen/Rematerializer.cpp
@@ -227,8 +227,8 @@ void Rematerializer::transferUserInternal(unsigned FromRegIdx,
"unrelated registers");
LLVM_DEBUG(rdbgs() << "User transfer from " << printID(FromRegIdx) << " to "
- << printID(ToRegIdx) << ": " << printUser(&UserMI)
- << '\n');
+ << printID(ToRegIdx) << ": " << printUser(&UserMI)
+ << '\n');
UserMI.substituteRegister(getReg(FromRegIdx).getDefReg(),
getReg(ToRegIdx).getDefReg(), 0, TRI);
>From a9a0726bf12fac10cdb656836b938cd5794f0cd1 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Fri, 30 Jan 2026 17:31:51 +0000
Subject: [PATCH 3/6] Clarify/Rephrase a lot of comments
---
llvm/include/llvm/CodeGen/Rematerializer.h | 188 ++++++++++++---------
llvm/lib/CodeGen/Rematerializer.cpp | 44 ++---
2 files changed, 134 insertions(+), 98 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/Rematerializer.h b/llvm/include/llvm/CodeGen/Rematerializer.h
index ff075a51e38f0..c54cb7814f742 100644
--- a/llvm/include/llvm/CodeGen/Rematerializer.h
+++ b/llvm/include/llvm/CodeGen/Rematerializer.h
@@ -28,21 +28,38 @@ namespace llvm {
/// following constraints.
/// 1. The register is virtual and has a single defining instruction.
/// 2. The single defining instruction is deemed rematerializable by the TII and
-/// has no non-constant and non-ignorable physical register use.
-/// 3. The register has at least one non-debug use that is inside or the
-/// boundary of a region.
+/// doesn't have any physical register use that is both non-constant and
+/// non-ignorable.
+/// 3. The register has at least one non-debug use that is inside a region or a
+/// region terminator terminator.
///
/// Rematerializable registers (represented by \ref Rematerializer::Reg) form a
/// DAG of their own, with every register having incoming edges from all
-/// rematerializable registers which are read by the instruction defining it.
-/// Ignoring outgoing edges, each register can be seen as the root of its own
-/// tree within this DAG. The API uses dense unsigned integers starting at 0 to
-/// reference rematerializable registers. These indices are immutable i.e., even
-/// when registers are deleted their respective integer handle remain valid.
-/// Method which perform actual rematerializations should however be assumed to
+/// rematerializable registers which are read by the instruction defining it. It
+/// is possible to rematerialize registers with unrematerializable dependencies;
+/// however the latter are not considered part of this DAG since their
+/// position/identity never change and therefore the same kind of tracking.
+///
+/// Each register has a "dependency DAG" which is defined as the subset of nodes
+/// in the overall DAG that have at least one path to the register, which is
+/// called the "root" register in this context. Semantically, these nodes are
+/// the registers which are involved into the computation of the root register
+/// i.e., all of its transitive dependencies. We use the term "root" because all
+/// paths within the dependency DAG of a register terminate at it; however,
+/// there may be multiple paths between a non-root node and the root node, so a
+/// dependency DAG is not always a tree.
+///
+/// The API uses dense unsigned integers starting at 0 to reference
+/// rematerializable registers. These indices are immutable i.e., even when
+/// registers are deleted their respective integer handle remain valid. Method
+/// which perform actual rematerializations should however be assumed to
/// invalidate addresses to \ref Rematerializer::Reg objects.
///
-/// The rematerializer supports rematerializing arbitrary complex trees of
+/// The API also uses dense unsigned integers starting at 0 to reference
+/// regions. These map directly to the indices of the corresponding regions in
+/// the \p Rematerializer::Regions vector pased during construction.
+///
+/// The rematerializer supports rematerializing arbitrary complex DAGs of
/// registers to regions where these registers are used, with the option of
/// re-using non-root registers or their previous rematerializations instead of
/// rematerializing them again. It also optionally supports rolling back
@@ -67,10 +84,10 @@ namespace llvm {
///
/// In its nomenclature, the rematerializer differentiates between "original
/// registers" (registers that were present when it analyzed the function) and
-/// rematerializations of these original registers. Rematerializations have a
-/// "parent" which is the original regiser they were rematerialized from
-/// (transitivity applies; a rematerialization and all of its own
-/// rematerializations have the same parent). Semantically, only original
+/// rematerializations of these original registers. Rematerializations have an
+/// "origin" which is the index of the original regiser they were rematerialized
+/// from (transitivity applies; a rematerialization and all of its own
+/// rematerializations have the same origin). Semantically, only original
/// registers have rematerializations.
class Rematerializer {
public:
@@ -84,11 +101,12 @@ class Rematerializer {
///
/// A rematerializable register also has an arbitrary number of users in an
/// arbitrary number of regions, potentially including its own defining
- /// region. When user transfers make a register lose all its users, the
- /// rematerializer marks it for deletion, in which case its defining
- /// instruction either becomes nullptr (without rollback support) or its
- /// opcode is set to TargetOpcode::DBG_VALUE (with rollback support) until
- /// \ref Rematerializer::commitRematerializations is called.
+ /// region. When rematerializations lead to operand changes in users, a
+ /// register may find itself without any user left, at which point the
+ /// rematerializer marks it for deletion. Its defining instruction either
+ /// becomes nullptr (without rollback support) or its opcode is set to
+ /// TargetOpcode::DBG_VALUE (with rollback support) until \ref
+ /// Rematerializer::commitRematerializations is called.
struct Reg {
/// Single MI defining the rematerializable register.
MachineInstr *DefMI;
@@ -119,6 +137,7 @@ class Rematerializer {
/// Returns the rematerializable register from its defining instruction.
inline Register getDefReg() const {
assert(DefMI && "defining instruction was deleted");
+ assert(DefMI->getOperand(0).isDef() && "not a register def");
return DefMI->getOperand(0).getReg();
}
@@ -179,47 +198,65 @@ class Rematerializer {
}
inline unsigned getNumRegions() const { return Regions.size(); }
- inline bool isRematerialization(unsigned RegIdx) const {
+ /// Whether register \p RegIdx is a rematerialization of some original
+ /// register.
+ inline bool isRematerializedRegister(unsigned RegIdx) const {
assert(RegIdx < Regs.size() && "out of bounds");
return RegIdx >= UnrematableOprds.size();
}
- /// Returns the parent index of rematerializable register \p RegIdx.
- inline unsigned getParentOf(unsigned RematRegIdx) const {
- assert(isRematerialization(RematRegIdx) && "not a rematerialization");
- return Parents[RematRegIdx - UnrematableOprds.size()];
+ /// Returns the origin index of rematerializable register \p RegIdx.
+ inline unsigned getOriginOf(unsigned RematRegIdx) const {
+ assert(isRematerializedRegister(RematRegIdx) && "not a rematerialization");
+ return Origins[RematRegIdx - UnrematableOprds.size()];
}
- /// If \p RegIdx is a rematerialization, returns its parent's index. If it is
+ /// If \p RegIdx is a rematerialization, returns its origin's index. If it is
/// an original register's index, returns the same index.
- inline unsigned getParentOrSelf(unsigned RegIdx) const {
- if (isRematerialization(RegIdx))
- return getParentOf(RegIdx);
+ inline unsigned getOriginOrSelf(unsigned RegIdx) const {
+ if (isRematerializedRegister(RegIdx))
+ return getOriginOf(RegIdx);
return RegIdx;
}
/// Returns operand indices corresponding to unrematerializable operands for
/// any register \p RegIdx.
inline ArrayRef<unsigned> getUnrematableOprds(unsigned RegIdx) const {
- return UnrematableOprds[getParentOrSelf(RegIdx)];
+ return UnrematableOprds[getOriginOrSelf(RegIdx)];
}
- /// When rematerializating a register (called the "root register" in this
+ /// When rematerializating a register (called the "root" register in this
/// context) to a given position, we must decide what to do with all its
- /// dependencies; for each dependency we can either
+ /// rematerializable dependencies (for unrematerializable dependencies, we
+ /// have no choice but to re-use the same register). For each rematerializable
+ /// dependency we can either
/// 1. rematerialize it along with the register,
/// 2. re-use it as-is, or
/// 3. re-use a pre-existing rematerialization of it.
- /// In case (1), the same decision needs to be made for all of the
- /// dependency's dependencies (i.e., the root's transitive dependencies). In
- /// cases (2) and (3), transitive dependencies need not be examined.
+ /// In case 1, the same decision needs to be made for all of the dependency's
+ /// dependencies. In cases 2 and 3, the dependency's dependencies need not be
+ /// examined.
///
/// This struct allows to encode decisions of types (2) and (3) when
- /// rematerialization of all of the root's transitive dependencies is
- /// undesirable. During rematerialization, all of the root's transitive
- /// dependencies which are not marked as re-used in some way will be
- /// rematerialized along the root.
+ /// rematerialization of all of the root's dependency DAG is undesirable.
+ /// During rematerialization, registers in the root's dependency DAG which
+ /// have a path to the root made up exclusively of non-re-used registers will
+ /// be rematerialized along with the root.
struct DependencyReuseInfo {
- /// Maps registers that the root transitively depends on to their
- /// respective rematerialization to use for the rematerialization of the
- /// root.
+ /// Keys and values are rematerializable register indices.
+ ///
+ /// Before rematerialization, this only contains entries for non-root
+ /// registers of the root's dependency DAG which should not be
+ /// rematerialized i.e., for which an existing register should be used
+ /// instead. These map each such non-root register to either the same
+ /// register (case 2, \ref DependencyReuseInfo::reuse) or to a
+ /// rematerialization of the key register (case 3, \ref
+ /// DependencyReuseInfo::useRemat).
+ ///
+ /// After rematerialization, this contains additional entries for non-root
+ /// registers of the root's dependency DAG that needed to be rematerialized
+ /// along the root. These map each such non-root register to their
+ /// corresponding new rematerialization that is used in the rematerialized
+ /// root's dependency DAG. It follows that the difference in map size before
+ /// and after rematerialization indicates the number of non-root registers
+ /// that were rematerialized along the root.
SmallDenseMap<unsigned, unsigned, 4> DependencyMap;
DependencyReuseInfo &reuse(unsigned DepIdx) {
@@ -236,13 +273,12 @@ class Rematerializer {
}
};
- /// Rematerializes a register tree rooted at register \p RootIdx to a region
- /// \p UseRegion where it has at least one user, transfers all its users in
- /// the region to the new register, and returns the latter's index. Transitive
- /// dependencies of the root are rematerialized or re-used according to \p
- /// DRI. If \p SupportRollback is true, rematerializations of registers that
- /// lose all their users as a consequence of the rematerializations can later
- /// be rolled back.
+ /// Rematerializes register \p RootIdx just before its first user inside
+ /// region \p UseRegion, transfers all its users in the region to the new
+ /// register, and returns the latter's index. The root's dependency DAG is
+ /// rematerialized or re-used according to \p DRI. If \p SupportRollback is
+ /// true, rematerializations of registers that lose all their users as a
+ /// consequence of the rematerializations can later be rolled back.
///
/// When the method returns, \p DRI contains additional mappings of all
/// transitive dependencies that had to be rematerialized to their
@@ -253,9 +289,9 @@ class Rematerializer {
bool SupportRollback,
DependencyReuseInfo &DRI);
- /// Rematerializes a register tree rooted at register \p RootIdx to position
- /// \p InsertPos and returns the new register's index. Transitive dependencies
- /// of the root are rematerialized or re-used according to \p DRI.
+ /// Rematerializes register \p RootIdx to position \p InsertPos and returns
+ /// the new register's index. The root's dependency DAG is rematerialized or
+ /// re-used according to \p DRI.
///
/// When the method returns, \p DRI contains additional mappings of all
/// transitive dependencies that had to be rematerialized to their respective
@@ -275,7 +311,7 @@ class Rematerializer {
void rollbackRematsOf(unsigned RootIdx);
/// Rolls back register \p RematIdx (which must be a rematerialization)
- /// transfering all its users back to its parent. The latter is revived if it
+ /// transfering all its users back to its origin. The latter is revived if it
/// was fully rematerialized (this requires that rollback support was set at
/// that time).
void rollback(unsigned RematIdx);
@@ -289,7 +325,7 @@ class Rematerializer {
/// Transfers all users of register \p FromRegIdx in region \p UseRegion to \p
/// ToRegIdx, the latter of which must be a rematerialization of the former or
- /// have the same parent register. Users in \p UseRegion must be reachable
+ /// have the same origin register. Users in \p UseRegion must be reachable
/// from \p ToRegIdx. If \p SupportRollback is true, rematerializations of
/// registers that lose all their users as a consequence of the transfer can
/// later be rolled back.
@@ -298,7 +334,7 @@ class Rematerializer {
/// Transfers user \p UserMI from register \p FromRegIdx to \p ToRegIdx,
/// the latter of which must be a rematerialization of the former or have the
- /// same parent register. \p UserMI must be a direct user of \p FromRegIdx. \p
+ /// same origin register. \p UserMI must be a direct user of \p FromRegIdx. \p
/// UserMI must be reachable from \p ToRegIdx. If \p SupportRollback is true,
/// rematerializations of registers that lose all their users as a consequence
/// of the transfer can later be rolled back.
@@ -313,10 +349,10 @@ class Rematerializer {
/// support rollback.
void commitRematerializations();
- /// Determines whether register operand \p MO is available at all \p Uses
- /// according to its current live interval.
- bool isMOAvailableAtUses(const MachineOperand &MO,
- ArrayRef<SlotIndex> Uses) const;
+ /// Determines whether (sub-)register operand \p MO is has the same value at
+ /// all \p Uses as at \p MO. This implies that it is also available at all \p
+ /// Uses according to its current live interval.
+ bool isMOIdenticalAtUses(MachineOperand &MO, ArrayRef<SlotIndex> Uses) const;
/// Finds the closest rematerialization of register \p RegIdx in region \p
/// Region that exists before slot \p Before. If no such rematerialization
@@ -324,7 +360,7 @@ class Rematerializer {
unsigned findRematInRegion(unsigned RegIdx, unsigned Region,
SlotIndex Before) const;
- Printable printTree(unsigned RootIdx) const;
+ Printable printDependencyDAG(unsigned RootIdx) const;
Printable printID(unsigned RegIdx) const;
Printable printRematReg(unsigned RegIdx, bool SkipRegions = false) const;
Printable printRegUsers(unsigned RegIdx) const;
@@ -344,18 +380,18 @@ class Rematerializer {
/// deleted. Indices inside this vector serve as handles for rematerializable
/// registers.
SmallVector<Reg> Regs;
- /// For each original register, stores indices of unrematerializable read
- /// register operands. This doesn't change after the initial collection
- /// period, so the size of the vector indicates the number of original
- /// registers.
+ /// For each original register, stores indices of its read register operands
+ /// which are unrematerializable. This doesn't change after the initial
+ /// collection period, so the size of the vector indicates the number of
+ /// original registers.
SmallVector<SmallVector<unsigned, 2>> UnrematableOprds;
/// Indicates the original register index of each rematerialization, in the
/// order in which they are created. The size of the vector indicates the
/// total number of rematerializations ever created, including those that were
/// deleted or rolled back.
- SmallVector<unsigned> Parents;
+ SmallVector<unsigned> Origins;
/// Maps original register indices to their currently alive
- /// rematerializations. In practive most registers don't have
+ /// rematerializations. In practice most registers don't have
/// rematerializations so this is represented as a map to lower memory cost.
DenseMap<unsigned, SmallDenseSet<unsigned, 4>> Rematerializations;
@@ -363,9 +399,8 @@ class Rematerializer {
/// data in the \ref Regs vector. This includes registers that no longer exist
/// in the MIR.
DenseMap<Register, unsigned> RegToIdx;
- /// Maps all MIs (except lone terminators, which are not part of any region)
- /// to their parent region. Non-lone terminators are considered part of the
- /// region they delimitate.
+ /// Maps all MIs to their parent region. Region terminators are considered
+ /// part of the region they end.
DenseMap<MachineInstr *, unsigned> MIRegion;
/// Set of registers whose live-range may have changed during past
/// rematerializations/rollbacks.
@@ -387,14 +422,15 @@ class Rematerializer {
/// Rematerializes register \p RegIdx at \p InsertPos, adding the new
/// rematerializable register to the backing vector \ref Regs and returning
/// its index inside the vector. Sets the new registers' rematerializable
- /// dependencies to \p Dependencies and its unrematerializable dependencies to
- /// the same as \p RegIdx. The new register initially has no user, it is
- /// assumed that the caller will give it at least one after its creation.
- /// Since the method appends to \ref Regs, references to elements within it
- /// should be considered invalidated across calls to this method unless the
- /// vector can be guaranteed to have enough space for an extra element.
- unsigned createReg(unsigned RegIdx, MachineBasicBlock::iterator InsertPos,
- SmallVectorImpl<Reg::Dependency> &&Dependencies);
+ /// dependencies to \p Dependencies (these are assumed to already exist in the
+ /// MIR) and its unrematerializable dependencies to the same as \p RegIdx. The
+ /// new register initially has no user. Since the method appends to \ref Regs,
+ /// references to elements within it should be considered invalidated across
+ /// calls to this method unless the vector can be guaranteed to have enough
+ /// space for an extra element.
+ unsigned rematerializeReg(unsigned RegIdx,
+ MachineBasicBlock::iterator InsertPos,
+ SmallVectorImpl<Reg::Dependency> &&Dependencies);
/// Internal version of \ref Rematerializer::transferUser that doesn't update
/// register users.
diff --git a/llvm/lib/CodeGen/Rematerializer.cpp b/llvm/lib/CodeGen/Rematerializer.cpp
index 1f5f8dee9c0e4..01c8a6f0c807b 100644
--- a/llvm/lib/CodeGen/Rematerializer.cpp
+++ b/llvm/lib/CodeGen/Rematerializer.cpp
@@ -105,7 +105,7 @@ Rematerializer::rematerializeToPos(unsigned RootIdx,
}
LLVM_DEBUG(--CallDepth);
- return createReg(RootIdx, InsertPos, std::move(NewDeps));
+ return rematerializeReg(RootIdx, InsertPos, std::move(NewDeps));
}
void Rematerializer::rollbackRematsOf(unsigned RootIdx) {
@@ -139,16 +139,17 @@ void Rematerializer::rollbackRematsOf(unsigned RootIdx) {
void Rematerializer::rollback(unsigned RematIdx) {
assert(getReg(RematIdx).DefMI && !Rollbackable.contains(RematIdx) &&
"cannot rollback dead register");
- const unsigned ParentRegIdx = getParentOf(RematIdx);
- reviveRegIfDead(ParentRegIdx);
+ const unsigned OriginRegIdx = getOriginOf(RematIdx);
+ reviveRegIfDead(OriginRegIdx);
for (const auto &[UseRegion, RegionUsers] : Regs[RematIdx].Uses) {
- transferRegionUsers(RematIdx, ParentRegIdx, UseRegion,
+ transferRegionUsers(RematIdx, OriginRegIdx, UseRegion,
/*SupportRollback=*/false);
}
}
void Rematerializer::reviveRegIfDead(unsigned RootIdx) {
- assert(!isRematerialization(RootIdx) && "cannot revive rematerialization");
+ assert(!isRematerializedRegister(RootIdx) &&
+ "cannot revive rematerialization");
Reg &Root = Regs[RootIdx];
if (!Root.Uses.empty()) {
@@ -223,7 +224,7 @@ void Rematerializer::transferUserInternal(unsigned FromRegIdx,
assert(getReg(FromRegIdx).Uses.at(MIRegion.at(&UserMI)).contains(&UserMI) &&
"not a user");
assert(FromRegIdx != ToRegIdx && "identical registers");
- assert(getParentOrSelf(FromRegIdx) == getParentOrSelf(ToRegIdx) &&
+ assert(getOriginOrSelf(FromRegIdx) == getOriginOrSelf(ToRegIdx) &&
"unrelated registers");
LLVM_DEBUG(rdbgs() << "User transfer from " << printID(FromRegIdx) << " to "
@@ -290,7 +291,7 @@ void Rematerializer::commitRematerializations() {
Rollbackable.clear();
}
-bool Rematerializer::isMOAvailableAtUses(const MachineOperand &MO,
+bool Rematerializer::isMOIdenticalAtUses(MachineOperand &MO,
ArrayRef<SlotIndex> Uses) const {
if (Uses.empty())
return true;
@@ -310,7 +311,7 @@ bool Rematerializer::isMOAvailableAtUses(const MachineOperand &MO,
unsigned Rematerializer::findRematInRegion(unsigned RegIdx, unsigned Region,
SlotIndex Before) const {
- auto It = Rematerializations.find(getParentOrSelf(RegIdx));
+ auto It = Rematerializations.find(getOriginOrSelf(RegIdx));
if (It == Rematerializations.end())
return NoReg;
const SmallDenseSet<unsigned, 4> &Remats = It->getSecond();
@@ -366,10 +367,10 @@ bool Rematerializer::deleteRegIfUnused(unsigned RootIdx, bool SupportRollback) {
} else {
deleteReg(RootIdx);
}
- if (isRematerialization(RootIdx)) {
+ if (isRematerializedRegister(RootIdx)) {
SmallDenseSet<unsigned, 4> &Remats =
- Rematerializations.at(getParentOf(RootIdx));
- assert(Remats.contains(RootIdx) && "broken link between remat and parent");
+ Rematerializations.at(getOriginOf(RootIdx));
+ assert(Remats.contains(RootIdx) && "broken link between remat and origin");
Remats.erase(RootIdx);
if (Remats.empty())
Rematerializations.erase(RootIdx);
@@ -430,7 +431,7 @@ bool Rematerializer::analyze() {
LLVM_DEBUG({
for (unsigned I = 0, E = getNumRegs(); I < E; ++I)
- dbgs() << printTree(I) << '\n';
+ dbgs() << printDepDAG(I) << '\n';
});
return !Regs.empty();
}
@@ -526,10 +527,9 @@ unsigned Rematerializer::getDefRegIdx(const MachineInstr &MI) const {
return UserRegIt->second;
}
-unsigned
-Rematerializer::createReg(unsigned RegIdx,
- MachineBasicBlock::iterator InsertPos,
- SmallVectorImpl<Reg::Dependency> &&Dependencies) {
+unsigned Rematerializer::rematerializeReg(
+ unsigned RegIdx, MachineBasicBlock::iterator InsertPos,
+ SmallVectorImpl<Reg::Dependency> &&Dependencies) {
unsigned UseRegion = MIRegion.at(&*InsertPos);
unsigned NewRegIdx = Regs.size();
@@ -539,13 +539,13 @@ Rematerializer::createReg(unsigned RegIdx,
NewReg.DefRegion = UseRegion;
NewReg.Dependencies = std::move(Dependencies);
- // Track rematerialization link between registers. Parents are always
+ // Track rematerialization link between registers. Origins are always
// registers that existed originally, and rematerializations are always
// attached to them.
- unsigned ParentIdx =
- isRematerialization(RegIdx) ? getParentOf(RegIdx) : RegIdx;
- Parents.push_back(ParentIdx);
- Rematerializations[ParentIdx].insert(NewRegIdx);
+ unsigned OriginIdx =
+ isRematerializedRegister(RegIdx) ? getOriginOf(RegIdx) : RegIdx;
+ Origins.push_back(OriginIdx);
+ Rematerializations[OriginIdx].insert(NewRegIdx);
// Use the TII to rematerialize the defining instruction with a new defined
// register.
@@ -637,7 +637,7 @@ void Rematerializer::Reg::eraseUser(MachineInstr *MI, unsigned Region) {
RUsers.erase(MI);
}
-Printable Rematerializer::printTree(unsigned RootIdx) const {
+Printable Rematerializer::printDepDAG(unsigned RootIdx) const {
return Printable([&, RootIdx](raw_ostream &OS) {
DenseMap<unsigned, unsigned> RegDepths;
std::function<void(unsigned, unsigned)> WalkTree =
>From 1b107e9957aab38a6819b40c2297471c61f1b048 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Fri, 30 Jan 2026 18:08:25 +0000
Subject: [PATCH 4/6] Fix typo in method name
---
llvm/lib/CodeGen/Rematerializer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/Rematerializer.cpp b/llvm/lib/CodeGen/Rematerializer.cpp
index 01c8a6f0c807b..37ccbfa3f0e1e 100644
--- a/llvm/lib/CodeGen/Rematerializer.cpp
+++ b/llvm/lib/CodeGen/Rematerializer.cpp
@@ -431,7 +431,7 @@ bool Rematerializer::analyze() {
LLVM_DEBUG({
for (unsigned I = 0, E = getNumRegs(); I < E; ++I)
- dbgs() << printDepDAG(I) << '\n';
+ dbgs() << printDependencyDAG(I) << '\n';
});
return !Regs.empty();
}
@@ -637,7 +637,7 @@ void Rematerializer::Reg::eraseUser(MachineInstr *MI, unsigned Region) {
RUsers.erase(MI);
}
-Printable Rematerializer::printDepDAG(unsigned RootIdx) const {
+Printable Rematerializer::printDependencyDAG(unsigned RootIdx) const {
return Printable([&, RootIdx](raw_ostream &OS) {
DenseMap<unsigned, unsigned> RegDepths;
std::function<void(unsigned, unsigned)> WalkTree =
>From cb972cc97089d00eecb3599d363e6b99d1a66897 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Mon, 2 Feb 2026 18:14:24 +0000
Subject: [PATCH 5/6] More changes following feedback
---
llvm/include/llvm/CodeGen/Rematerializer.h | 51 ++---
llvm/lib/CodeGen/Rematerializer.cpp | 203 +++++++++---------
llvm/unittests/CodeGen/RematerializerTest.cpp | 150 ++++++-------
3 files changed, 195 insertions(+), 209 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/Rematerializer.h b/llvm/include/llvm/CodeGen/Rematerializer.h
index c54cb7814f742..45c99c88e155c 100644
--- a/llvm/include/llvm/CodeGen/Rematerializer.h
+++ b/llvm/include/llvm/CodeGen/Rematerializer.h
@@ -63,7 +63,8 @@ namespace llvm {
/// registers to regions where these registers are used, with the option of
/// re-using non-root registers or their previous rematerializations instead of
/// rematerializing them again. It also optionally supports rolling back
-/// previous rematerializations to restore the MIR state to what it was
+/// previous rematerializations (set during analysis phase, see \ref
+/// Rematerializer::analyze) to restore the MIR state to what it was
/// pre-rematerialization. When enabled, machine instructions defining
/// rematerializable registers that no longer have any uses following previous
/// rematerializations will not be deleted from the MIR; their opcode will
@@ -175,15 +176,16 @@ class Rematerializer {
/// Simply initializes some internal state, does not identify
/// rematerialization candidates.
Rematerializer(MachineFunction &MF,
- SmallVectorImpl<RegionBoundaries> &Regions,
- bool RegionsTopDown, LiveIntervals &LIS)
- : MF(MF), Regions(Regions), MRI(MF.getRegInfo()), LIS(LIS),
- TII(*MF.getSubtarget().getInstrInfo()), TRI(TII.getRegisterInfo()),
- RegionsTopDown(RegionsTopDown) {}
+ SmallVectorImpl<RegionBoundaries> &Regions, LiveIntervals &LIS)
+ : Regions(Regions), MRI(MF.getRegInfo()), LIS(LIS),
+ TII(*MF.getSubtarget().getInstrInfo()), TRI(TII.getRegisterInfo()) {}
/// Goes through the whole MF and identifies all rematerializable registers.
- /// Returns whether there is any rematerializable register in the MF.
- bool analyze();
+ /// When \p SupportRollback is set, rematerializations of original registers
+ /// can be rolled back and original registers are maintained in the IR even
+ /// when they longer have any users. Returns whether there is any
+ /// rematerializable register in regions.
+ bool analyze(bool SupportRollback);
inline const Reg &getReg(unsigned RegIdx) const {
assert(RegIdx < Regs.size() && "out of bounds");
@@ -276,9 +278,7 @@ class Rematerializer {
/// Rematerializes register \p RootIdx just before its first user inside
/// region \p UseRegion, transfers all its users in the region to the new
/// register, and returns the latter's index. The root's dependency DAG is
- /// rematerialized or re-used according to \p DRI. If \p SupportRollback is
- /// true, rematerializations of registers that lose all their users as a
- /// consequence of the rematerializations can later be rolled back.
+ /// rematerialized or re-used according to \p DRI.
///
/// When the method returns, \p DRI contains additional mappings of all
/// transitive dependencies that had to be rematerialized to their
@@ -286,7 +286,6 @@ class Rematerializer {
/// Rematerializer::Reg should be considered invalidated by calls to this
/// method.
unsigned rematerializeToRegion(unsigned RootIdx, unsigned UseRegion,
- bool SupportRollback,
DependencyReuseInfo &DRI);
/// Rematerializes register \p RootIdx to position \p InsertPos and returns
@@ -326,20 +325,16 @@ class Rematerializer {
/// Transfers all users of register \p FromRegIdx in region \p UseRegion to \p
/// ToRegIdx, the latter of which must be a rematerialization of the former or
/// have the same origin register. Users in \p UseRegion must be reachable
- /// from \p ToRegIdx. If \p SupportRollback is true, rematerializations of
- /// registers that lose all their users as a consequence of the transfer can
- /// later be rolled back.
+ /// from \p ToRegIdx.
void transferRegionUsers(unsigned FromRegIdx, unsigned ToRegIdx,
- unsigned UseRegion, bool SupportRollback);
+ unsigned UseRegion);
/// Transfers user \p UserMI from register \p FromRegIdx to \p ToRegIdx,
/// the latter of which must be a rematerialization of the former or have the
/// same origin register. \p UserMI must be a direct user of \p FromRegIdx. \p
- /// UserMI must be reachable from \p ToRegIdx. If \p SupportRollback is true,
- /// rematerializations of registers that lose all their users as a consequence
- /// of the transfer can later be rolled back.
+ /// UserMI must be reachable from \p ToRegIdx.
void transferUser(unsigned FromRegIdx, unsigned ToRegIdx,
- MachineInstr &UserMI, bool SupportRollback);
+ MachineInstr &UserMI);
/// Recomputes all live intervals that have changed as a result of previous
/// rematerializations/rollbacks.
@@ -367,13 +362,11 @@ class Rematerializer {
Printable printUser(const MachineInstr *MI) const;
private:
- MachineFunction &MF;
SmallVectorImpl<RegionBoundaries> &Regions;
MachineRegisterInfo &MRI;
LiveIntervals &LIS;
const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
- bool RegionsTopDown;
/// Rematerializable registers identified since the rematerializer's creation,
/// both dead and alive, originals and rematerializations. No register is ever
@@ -400,7 +393,7 @@ class Rematerializer {
/// in the MIR.
DenseMap<Register, unsigned> RegToIdx;
/// Maps all MIs to their parent region. Region terminators are considered
- /// part of the region they end.
+ /// part of the region they terminate.
DenseMap<MachineInstr *, unsigned> MIRegion;
/// Set of registers whose live-range may have changed during past
/// rematerializations/rollbacks.
@@ -409,9 +402,11 @@ class Rematerializer {
/// currently rollback-able. Values map register machine operand indices to
/// their original register.
DenseMap<unsigned, DenseMap<unsigned, Register>> Rollbackable;
+ /// Whether all rematerializations of registers identified during the last
+ /// analysis phase will be rollback-able.
+ bool SupportRollback = false;
- /// Collects all rematerializable registers inside region \p DefRegion.
- void collectRegs(unsigned DefRegion);
+ void addRegIfRematerializable(unsigned VirtRegIdx, BitVector &SeenRegs);
/// Determines whether \p MI is considered rematerializable. This further
/// restricts constraints imposed by the TII on rematerializable instructions,
@@ -439,10 +434,8 @@ class Rematerializer {
/// Deletes register \p RootIdx if it no longer has any user. If the register
/// is deleted, recursively deletes any of its transitive rematerializable
- /// dependencies that no longer have users as a result. When \p
- /// SupportRollback is true, allows to rollback rematerializations of the
- /// deleted register later on.
- bool deleteRegIfUnused(unsigned RootIdx, bool SupportRollback);
+ /// dependencies that no longer have users as a result.
+ bool deleteRegIfUnused(unsigned RootIdx);
/// Deletes rematerializable register \p RegIdx from the DAG and relevant
/// internal state.
diff --git a/llvm/lib/CodeGen/Rematerializer.cpp b/llvm/lib/CodeGen/Rematerializer.cpp
index 37ccbfa3f0e1e..ce439322e26e2 100644
--- a/llvm/lib/CodeGen/Rematerializer.cpp
+++ b/llvm/lib/CodeGen/Rematerializer.cpp
@@ -13,12 +13,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Rematerializer.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
@@ -67,13 +65,12 @@ static Register isRegDependency(const MachineOperand &MO) {
unsigned Rematerializer::rematerializeToRegion(unsigned RootIdx,
unsigned UseRegion,
- bool SupportRollback,
DependencyReuseInfo &DRI) {
MachineInstr *FirstMI =
getReg(RootIdx).getRegionUseBounds(UseRegion, LIS).first;
unsigned NewRegIdx = rematerializeToPos(RootIdx, FirstMI, DRI);
- transferRegionUsers(RootIdx, NewRegIdx, UseRegion, SupportRollback);
+ transferRegionUsers(RootIdx, NewRegIdx, UseRegion);
return NewRegIdx;
}
@@ -116,23 +113,19 @@ void Rematerializer::rollbackRematsOf(unsigned RootIdx) {
LLVM_DEBUG({
rdbgs() << "Rolling back rematerializations of " << printID(RootIdx)
<< '\n';
- ++CallDepth;
});
reviveRegIfDead(RootIdx);
// All of the rematerialization's users must use the revived register.
for (unsigned RematRegIdx : Remats->getSecond()) {
- for (const auto &[UseRegion, RegionUsers] : Regs[RematRegIdx].Uses) {
- transferRegionUsers(RematRegIdx, RootIdx, UseRegion,
- /*SupportRollback=*/false);
- }
+ for (const auto &[UseRegion, RegionUsers] : Regs[RematRegIdx].Uses)
+ transferRegionUsers(RematRegIdx, RootIdx, UseRegion);
}
Rematerializations.erase(RootIdx);
LLVM_DEBUG({
rdbgs() << "** Rolled back rematerializations of " << printID(RootIdx)
<< '\n';
- --CallDepth;
});
}
@@ -141,10 +134,8 @@ void Rematerializer::rollback(unsigned RematIdx) {
"cannot rollback dead register");
const unsigned OriginRegIdx = getOriginOf(RematIdx);
reviveRegIfDead(OriginRegIdx);
- for (const auto &[UseRegion, RegionUsers] : Regs[RematIdx].Uses) {
- transferRegionUsers(RematIdx, OriginRegIdx, UseRegion,
- /*SupportRollback=*/false);
- }
+ for (const auto &[UseRegion, RegionUsers] : Regs[RematIdx].Uses)
+ transferRegionUsers(RematIdx, OriginRegIdx, UseRegion);
}
void Rematerializer::reviveRegIfDead(unsigned RootIdx) {
@@ -164,7 +155,7 @@ void Rematerializer::reviveRegIfDead(unsigned RootIdx) {
assert(Rematerializations.contains(RootIdx) && "no remats");
LLVM_DEBUG({
- rdbgs() << "Partially rolling back " << printID(RootIdx) << '\n';
+ rdbgs() << "Reviving " << printID(RootIdx) << '\n';
++CallDepth;
});
@@ -185,7 +176,7 @@ void Rematerializer::reviveRegIfDead(unsigned RootIdx) {
LISUpdates.insert(RootIdx);
LLVM_DEBUG({
- rdbgs() << "** Partially rolled back " << printID(RootIdx) << " @ ";
+ rdbgs() << "** Revived " << printID(RootIdx) << " @ ";
LIS.getInstructionIndex(*Root.DefMI).print(dbgs());
dbgs() << '\n';
--CallDepth;
@@ -193,17 +184,16 @@ void Rematerializer::reviveRegIfDead(unsigned RootIdx) {
}
void Rematerializer::transferUser(unsigned FromRegIdx, unsigned ToRegIdx,
- MachineInstr &UserMI, bool SupportRollback) {
+ MachineInstr &UserMI) {
transferUserInternal(FromRegIdx, ToRegIdx, UserMI);
unsigned UserRegion = MIRegion[&UserMI];
Regs[FromRegIdx].eraseUser(&UserMI, UserRegion);
Regs[ToRegIdx].addUser(&UserMI, UserRegion);
- deleteRegIfUnused(FromRegIdx, SupportRollback);
+ deleteRegIfUnused(FromRegIdx);
}
void Rematerializer::transferRegionUsers(unsigned FromRegIdx, unsigned ToRegIdx,
- unsigned UseRegion,
- bool SupportRollback) {
+ unsigned UseRegion) {
auto &FromRegUsers = Regs[FromRegIdx].Uses;
auto UsesIt = FromRegUsers.find(UseRegion);
if (UsesIt == FromRegUsers.end())
@@ -214,7 +204,7 @@ void Rematerializer::transferRegionUsers(unsigned FromRegIdx, unsigned ToRegIdx,
transferUserInternal(FromRegIdx, ToRegIdx, *UserMI);
Regs[ToRegIdx].addUsers(RegionUsers, UseRegion);
FromRegUsers.erase(UseRegion);
- deleteRegIfUnused(FromRegIdx, SupportRollback);
+ deleteRegIfUnused(FromRegIdx);
}
void Rematerializer::transferUserInternal(unsigned FromRegIdx,
@@ -333,7 +323,7 @@ unsigned Rematerializer::findRematInRegion(unsigned RegIdx, unsigned Region,
return BestRegIdx;
}
-bool Rematerializer::deleteRegIfUnused(unsigned RootIdx, bool SupportRollback) {
+bool Rematerializer::deleteRegIfUnused(unsigned RootIdx) {
Reg &Root = Regs[RootIdx];
if (!Root.Uses.empty())
return false;
@@ -346,15 +336,16 @@ bool Rematerializer::deleteRegIfUnused(unsigned RootIdx, bool SupportRollback) {
for (const Reg::Dependency &Dep : Root.Dependencies) {
LLVM_DEBUG(rdbgs() << "Deleting user from " << printID(Dep.RegIdx) << "\n");
Regs[Dep.RegIdx].eraseUser(Root.DefMI, Root.DefRegion);
- deleteRegIfUnused(Dep.RegIdx, SupportRollback);
+ deleteRegIfUnused(Dep.RegIdx);
}
LIS.removeInterval(DefReg);
LISUpdates.erase(RootIdx);
- if (SupportRollback) {
- // Replace all read registers with the null one to prevent issues in live
+ if (SupportRollback && !isRematerializedRegister(RootIdx)) {
+ // Replace all read registers with the null one to prevent them from showing
+ // up in use-lists, which is disallowed for debug instructions in live
// interval calculations. Store mappings between operand indices and
- // original registers for potential rolqlback.
+ // original registers for potential rollback.
DenseMap<unsigned, Register> &RegMap =
Rollbackable.try_emplace(RootIdx).first->getSecond();
for (auto [Idx, MO] : enumerate(Root.DefMI->operands())) {
@@ -393,41 +384,44 @@ void Rematerializer::deleteReg(unsigned RegIdx) {
DeleteReg.DefMI = nullptr;
}
-bool Rematerializer::analyze() {
- MIRegion.clear();
+bool Rematerializer::analyze(bool SupportRollback) {
Regs.clear();
+ UnrematableOprds.clear();
+ Origins.clear();
+ Rematerializations.clear();
+ MIRegion.clear();
RegToIdx.clear();
LISUpdates.clear();
Rollbackable.clear();
+ this->SupportRollback = SupportRollback;
if (Regions.empty())
return false;
- // Maps each basic block number to regions that are part of the BB.
- DenseMap<unsigned, SmallVector<unsigned, 4>> RegionsPerBlock;
-
+ // Initialize MI to containing region mapping.
const unsigned NumRegions = Regions.size();
for (unsigned I = 0; I < NumRegions; ++I) {
RegionBoundaries Region = Regions[I];
- for (auto MI = Region.first; MI != Region.second; ++MI)
+ assert(Region.first != Region.second && "empty cannot be region");
+ for (auto MI = Region.first; MI != Region.second; ++MI) {
+ assert(!MIRegion.contains(&*MI) && "regions should not intersect");
MIRegion.insert({&*MI, I});
- MachineBasicBlock *MBB = Region.first->getParent();
- if (Region.second != MBB->end())
- MIRegion.insert({&*Region.second, I});
- RegionsPerBlock[MBB->getNumber()].push_back(I);
+ }
+
+ // A terminator instruction is considered part of the region it terminates.
+ if (Region.second != Region.first->getParent()->end()) {
+ MachineInstr *RegionTerm = &*Region.second;
+ assert(!MIRegion.contains(RegionTerm) && "regions should not intersect");
+ MIRegion.insert({RegionTerm, I});
+ }
}
- // Visit regions in dominator tree pre-order to ensure that regions defining
- // registers come before regions using them.
- MachineDominatorTree MDT(MF);
- for (MachineDomTreeNode *MBB : depth_first(&MDT)) {
- auto MBBRegions = RegionsPerBlock.find(MBB->getBlock()->getNumber());
- if (MBBRegions == RegionsPerBlock.end())
- continue;
- auto MBBRegionsIt = RegionsTopDown ? MBBRegions->getSecond()
- : reverse(MBBRegions->getSecond());
- for (unsigned I : MBBRegionsIt)
- collectRegs(I);
+ const unsigned NumVirtRegs = MRI.getNumVirtRegs();
+ BitVector SeenRegs(NumVirtRegs);
+ for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
+ if (!SeenRegs[I])
+ addRegIfRematerializable(I, SeenRegs);
}
+ assert(Regs.size() == UnrematableOprds.size());
LLVM_DEBUG({
for (unsigned I = 0, E = getNumRegs(); I < E; ++I)
@@ -436,71 +430,71 @@ bool Rematerializer::analyze() {
return !Regs.empty();
}
-void Rematerializer::collectRegs(unsigned DefRegion) {
- // Collect partially rematerializable registers in instruction order within
- // each region. This guarantees that, within a single region, partially
- // rematerializable registers used in instructions defining other partially
- // rematerializable registers are visited first. This is important to
- // guarantee that all of a register's dependencies are visited before the
- // register itself.
- RegionBoundaries Bounds = Regions[DefRegion];
- for (auto MI = Bounds.first; MI != Bounds.second; ++MI) {
- MachineInstr &DefMI = *MI;
- if (!isMIRematerializable(DefMI))
- continue;
+void Rematerializer::addRegIfRematerializable(unsigned VirtRegIdx,
+ BitVector &SeenRegs) {
+ assert(!SeenRegs[VirtRegIdx] && "register already seen");
+ Register DefReg = Register::index2VirtReg(VirtRegIdx);
+ SeenRegs.set(VirtRegIdx);
- Reg &CurrentReg = Regs.emplace_back();
- CurrentReg.DefMI = &DefMI;
- CurrentReg.DefRegion = DefRegion;
- Register DefReg = CurrentReg.getDefReg();
- unsigned SubIdx = DefMI.getOperand(0).getSubReg();
- CurrentReg.Mask = SubIdx ? TRI.getSubRegIndexLaneMask(SubIdx)
- : MRI.getMaxLaneMaskForVReg(DefReg);
-
- // Collect the candidate's direct users, both rematerializable and
- // unrematerializable.
- for (MachineInstr &UseMI : MRI.use_nodbg_instructions(DefReg)) {
- auto UseRegion = MIRegion.find(&UseMI);
- if (UseRegion == MIRegion.end()) {
- // Only lone MI terminators can trigger this condition. They are not
- // part of any region so we cannot rematerialize next to them. Just
- // consider this register unrematerializable.
- CurrentReg.Uses.clear();
- break;
- }
- CurrentReg.addUser(&UseMI, UseRegion->second);
- }
- if (CurrentReg.Uses.empty()) {
- Regs.pop_back();
- continue;
- }
+ MachineOperand *MO = MRI.getOneDef(DefReg);
+ if (!MO)
+ return;
+ MachineInstr &DefMI = *MO->getParent();
+ if (!isMIRematerializable(DefMI))
+ return;
+ auto DefRegion = MIRegion.find(&DefMI);
+ if (DefRegion == MIRegion.end())
+ return;
- // Collect the candidate's dependencies. If the same register is used
- // multiple times we just need to store it once.
- SmallDenseSet<Register, 4> AllDepRegs;
- SmallVector<unsigned, 2> &Unrematable = UnrematableOprds.emplace_back();
- for (const auto &[MOIdx, MO] : enumerate(CurrentReg.DefMI->operands())) {
- Register DepReg = isRegDependency(MO);
- if (!DepReg || !AllDepRegs.insert(DepReg).second)
- continue;
- if (auto DepIt = RegToIdx.find(DepReg); DepIt != RegToIdx.end()) {
- Reg::Dependency Dep(MOIdx, DepIt->second);
- CurrentReg.Dependencies.push_back(Dep);
- } else
- Unrematable.push_back(MOIdx);
- }
+ Reg RematReg;
+ RematReg.DefMI = &DefMI;
+ RematReg.DefRegion = DefRegion->second;
+ unsigned SubIdx = DefMI.getOperand(0).getSubReg();
+ RematReg.Mask = SubIdx ? TRI.getSubRegIndexLaneMask(SubIdx)
+ : MRI.getMaxLaneMaskForVReg(DefReg);
+
+ // Collect the candidate's direct users, both rematerializable and
+ // unrematerializable. MIs outside provided regions cannot be tracked so the
+ // registers they use are not safely rematerializable.
+ for (MachineInstr &UseMI : MRI.use_nodbg_instructions(DefReg)) {
+ if (auto UseRegion = MIRegion.find(&UseMI); UseRegion != MIRegion.end())
+ RematReg.addUser(&UseMI, UseRegion->second);
+ else
+ return;
+ }
+ if (RematReg.Uses.empty())
+ return;
- // The register is rematerializable.
- RegToIdx.insert({DefReg, Regs.size() - 1});
+ // Collect the candidate's dependencies. If the same register is used
+ // multiple times we just need to consider it once.
+ SmallDenseSet<Register, 4> AllDepRegs;
+ SmallVector<unsigned, 2> UnrematDeps;
+ for (const auto &[MOIdx, MO] : enumerate(RematReg.DefMI->operands())) {
+ Register DepReg = isRegDependency(MO);
+ if (!DepReg || !AllDepRegs.insert(DepReg).second)
+ continue;
+ unsigned DepRegIdx = DepReg.virtRegIndex();
+ if (!SeenRegs[DepRegIdx])
+ addRegIfRematerializable(DepRegIdx, SeenRegs);
+ if (auto DepIt = RegToIdx.find(DepReg); DepIt != RegToIdx.end())
+ RematReg.Dependencies.push_back(Reg::Dependency(MOIdx, DepIt->second));
+ else
+ UnrematDeps.push_back(MOIdx);
}
- assert(Regs.size() == UnrematableOprds.size());
+ // The register is rematerializable.
+ RegToIdx.insert({DefReg, Regs.size()});
+ Regs.push_back(RematReg);
+ UnrematableOprds.push_back(UnrematDeps);
}
bool Rematerializer::isMIRematerializable(const MachineInstr &MI) const {
if (!TII.isReMaterializable(MI))
return false;
+ assert(MI.getOperand(0).getReg().isVirtual() && "should be virtual");
+ assert(MRI.hasOneDef(MI.getOperand(0).getReg()) && "should have single def");
+
for (const MachineOperand &MO : MI.all_uses()) {
// We can't remat physreg uses, unless it is a constant or an ignorable
// use (e.g. implicit exec use on VALU instructions)
@@ -511,9 +505,7 @@ bool Rematerializer::isMIRematerializable(const MachineInstr &MI) const {
}
}
- // We only support rematerializing virtual registers with one definition.
- Register DefReg = MI.getOperand(0).getReg();
- return DefReg.isVirtual() && MRI.hasOneDef(DefReg);
+ return true;
}
unsigned Rematerializer::getDefRegIdx(const MachineInstr &MI) const {
@@ -658,8 +650,7 @@ Printable Rematerializer::printDependencyDAG(unsigned RootIdx) const {
OS << printID(RootIdx) << " has " << Regs.size() - 1 << " dependencies\n";
for (const auto &[RegIdx, Depth] : Regs) {
- std::string Shift(2 * Depth, ' ');
- OS << Shift << (Depth ? '|' : '*') << ' '
+ OS << indent(Depth, 2) << (Depth ? '|' : '*') << ' '
<< printRematReg(RegIdx, /*SkipRegions=*/Depth) << '\n';
}
OS << printRegUsers(RootIdx);
diff --git a/llvm/unittests/CodeGen/RematerializerTest.cpp b/llvm/unittests/CodeGen/RematerializerTest.cpp
index 0f5789897e083..af16774f7b1d0 100644
--- a/llvm/unittests/CodeGen/RematerializerTest.cpp
+++ b/llvm/unittests/CodeGen/RematerializerTest.cpp
@@ -95,7 +95,8 @@ class RematerializerTest : public testing::Test {
return true;
}
- Rematerializer &getRematerializer(StringRef MIR, StringRef FunName) {
+ Rematerializer &getRematerializer(StringRef MIR, StringRef FunName,
+ bool SupportRollback) {
MachineFunction &MF =
FAM.getResult<MachineFunctionAnalysis>(*M->getFunction(FunName))
.getMF();
@@ -105,23 +106,33 @@ class RematerializerTest : public testing::Test {
/// Each MBB is its own region. This wouldn't be how e.g., the scheduler
/// would do that but here we only want to test the rematerializer's API so
/// it is good enough.
- for (auto MBB = MF.begin(), MBBEnd = MF.end(); MBB != MBBEnd; ++MBB)
- Regions->push_back({MBB->begin(), MBB->end()});
- Remater = std::make_unique<Rematerializer>(MF, *Regions,
- /*RegionsTopDown=*/false, LIS);
- Remater->analyze();
+ for (MachineBasicBlock &MBB : MF)
+ Regions->push_back({MBB.begin(), MBB.end()});
+ Remater = std::make_unique<Rematerializer>(MF, *Regions, LIS);
+ Remater->analyze(SupportRollback);
return *Remater;
}
+
+ /// Returns the number of users of register \p RegIdx.
+ unsigned getNumUsers(unsigned RegIdx) {
+ unsigned NumUsers = 0;
+ for (const auto &[_, RegionUses] : Remater->getReg(RegIdx).Uses)
+ NumUsers += RegionUses.size();
+ return NumUsers;
+ }
+
+ /// Returns the size of region \p RegionIdx.
+ unsigned getNumRegions(unsigned RegionIdx) {
+ const Rematerializer::RegionBoundaries &Region = (*Regions)[RegionIdx];
+ return std::distance(Region.first, Region.second);
+ }
};
using MBBRegionsVector = SmallVector<SchedRegion, 16>;
/// Asserts that region RegionIdx contains RegionSize instructions.
#define ASSERT_REGION_SIZE(RegionIdx, RegionSize) \
- { \
- const auto &Region = (*Regions)[RegionIdx]; \
- ASSERT_EQ(std::distance(Region.first, Region.second), RegionSize); \
- }
+ ASSERT_EQ(getNumRegions(RegionIdx), RegionSize)
/// Asserts that regions have sizes RegionSizes, which must be an iterable
/// object with the same number of elements as the number of regions.
@@ -132,26 +143,21 @@ using MBBRegionsVector = SmallVector<SchedRegion, 16>;
ASSERT_REGION_SIZE(RegionIdx, Size); \
}
-/// Asserts that register RegIdx in the rematerializer has a total of N users.
-#define ASSERT_NUM_USERS(RegIdx, N) \
- { \
- unsigned NumUsers = 0; \
- for (const auto &[_, RegionUses] : Remater.getReg(RegIdx).Uses) \
- NumUsers += RegionUses.size(); \
- ASSERT_EQ(NumUsers, static_cast<unsigned>(N)); \
- }
+/// Expects that register RegIdx in the rematerializer has a total of N users.
+#define EXPECT_NUM_USERS(RegIdx, N) \
+ EXPECT_EQ(getNumUsers(RegIdx), static_cast<unsigned>(N))
-/// Asserts that register RegIdx in the remterializer hsa no users.
-#define ASSERT_NO_USERS(RegIdx) ASSERT_NUM_USERS(RegIdx, 0)
+/// Expects that register RegIdx in the remterializer hsa no users.
+#define EXPECT_NO_USERS(RegIdx) EXPECT_NUM_USERS(RegIdx, 0)
-/// Asserts that rematerialized register RegIdx has parent ParentIdx, is defined
+/// Expects that rematerialized register RegIdx has origin OriginIdx, is defined
/// in region DefRegionIdx, and has a total of NumUsers users.
-#define ASSERT_REMAT(RegIdx, ParentIdx, DefRegionIdx, NumUsers) \
+#define EXPECT_REMAT(RegIdx, OriginIdx, DefRegionIdx, NumUsers) \
{ \
const Rematerializer::Reg &RematReg = Remater.getReg(RegIdx); \
- ASSERT_EQ(Remater.getParentOf(RegIdx), ParentIdx); \
- ASSERT_EQ(RematReg.DefRegion, DefRegionIdx); \
- ASSERT_NUM_USERS(RegIdx, NumUsers); \
+ EXPECT_EQ(Remater.getOriginOf(RegIdx), OriginIdx); \
+ EXPECT_EQ(RematReg.DefRegion, DefRegionIdx); \
+ EXPECT_NUM_USERS(RegIdx, NumUsers); \
}
/// Rematerializes a tree of registers to a single user in different ways using
@@ -178,7 +184,8 @@ body: |
...
)";
ASSERT_TRUE(parseMIR(MIR));
- Rematerializer &Remater = getRematerializer(MIR, "TreeRematRollback");
+ Rematerializer &Remater =
+ getRematerializer(MIR, "TreeRematRollback", /*SupportRollback=*/true);
Rematerializer::DependencyReuseInfo DRI;
// MBB/Region indices.
@@ -194,17 +201,16 @@ body: |
// Rematerialize Add23 with all transitive dependencies.
{
- Remater.rematerializeToRegion(/*RootIdx=*/Add23, /*UseRegion=*/MBB1,
- /*SupportRollback=*/true, DRI);
+ Remater.rematerializeToRegion(/*RootIdx=*/Add23, /*UseRegion=*/MBB1, DRI);
Remater.updateLiveIntervals();
// None of the original registers have any users, but they still are in the
// MIR because we enabled rollback support.
- ASSERT_NO_USERS(Cst0);
- ASSERT_NO_USERS(Cst1);
- ASSERT_NO_USERS(Add01);
- ASSERT_NO_USERS(Cst3);
- ASSERT_NO_USERS(Add23);
+ EXPECT_NO_USERS(Cst0);
+ EXPECT_NO_USERS(Cst1);
+ EXPECT_NO_USERS(Add01);
+ EXPECT_NO_USERS(Cst3);
+ EXPECT_NO_USERS(Add23);
// Copies of all MIs were inserted into the second MBB.
RegionSizes[MBB1] += 5;
@@ -221,17 +227,16 @@ body: |
// Rematerialize Add23 only with its direct dependencies, reuse the rest.
{
DRI.clear().reuse(Cst0).reuse(Cst1);
- Remater.rematerializeToRegion(/*RootIdx=*/Add23, /*UseRegion=*/MBB1,
- /*SupportRollback=*/true, DRI);
+ Remater.rematerializeToRegion(/*RootIdx=*/Add23, /*UseRegion=*/MBB1, DRI);
Remater.updateLiveIntervals();
// Re-used registers have rematerializations as their single user (original
// users are dead). Rematerialized registers have no users.
- ASSERT_NUM_USERS(Cst0, 1);
- ASSERT_NUM_USERS(Cst1, 1);
- ASSERT_NO_USERS(Add01);
- ASSERT_NO_USERS(Cst3);
- ASSERT_NO_USERS(Add23);
+ EXPECT_NUM_USERS(Cst0, 1);
+ EXPECT_NUM_USERS(Cst1, 1);
+ EXPECT_NO_USERS(Add01);
+ EXPECT_NO_USERS(Cst3);
+ EXPECT_NO_USERS(Add23);
// Only immediate dependencies are copied to the second MBB.
RegionSizes[MBB1] += 3;
@@ -255,33 +260,33 @@ body: |
Remater.rematerializeToPos(/*RootIdx=*/Add01, NopMI, DRI);
// This adds an additional user to the used constants, and does not change
// existing users for the original register.
- ASSERT_NO_USERS(RematAdd01);
- ASSERT_NUM_USERS(Add01, 1);
- ASSERT_NUM_USERS(Cst0, 2);
- ASSERT_NUM_USERS(Cst1, 2);
+ EXPECT_NO_USERS(RematAdd01);
+ EXPECT_NUM_USERS(Add01, 1);
+ EXPECT_NUM_USERS(Cst0, 2);
+ EXPECT_NUM_USERS(Cst1, 2);
DRI.clear();
const unsigned RematCst3 =
Remater.rematerializeToPos(/*RootIdx=*/Cst3, NopMI, DRI);
// This does not change existing users for the original register.
- ASSERT_NO_USERS(RematCst3);
- ASSERT_NUM_USERS(Cst3, 1);
+ EXPECT_NO_USERS(RematCst3);
+ EXPECT_NUM_USERS(Cst3, 1);
DRI.clear().useRemat(Add01, RematAdd01).useRemat(Cst3, RematCst3);
const unsigned RematAdd23 =
Remater.rematerializeToPos(/*RootIdx=*/Add23, NopMI, DRI);
// This adds a user to used rematerializations, and does not change existing
// users for the original register.
- ASSERT_NO_USERS(RematAdd23);
- ASSERT_NUM_USERS(Add23, 1);
- ASSERT_NUM_USERS(RematAdd01, 1);
- ASSERT_NUM_USERS(RematCst3, 1);
+ EXPECT_NO_USERS(RematAdd23);
+ EXPECT_NUM_USERS(Add23, 1);
+ EXPECT_NUM_USERS(RematAdd01, 1);
+ EXPECT_NUM_USERS(RematCst3, 1);
// Finally transfer the NOP user from the original to the rematerialized
// register.
- Remater.transferUser(Add23, RematAdd23, *NopMI, /*SupportRollback=*/true);
- ASSERT_NO_USERS(Add23);
- ASSERT_NUM_USERS(RematAdd23, 1);
+ Remater.transferUser(Add23, RematAdd23, *NopMI);
+ EXPECT_NO_USERS(Add23);
+ EXPECT_NUM_USERS(RematAdd23, 1);
RegionSizes[MBB1] += 3;
ASSERT_REGION_SIZES(RegionSizes);
@@ -325,7 +330,8 @@ body: |
...
)";
ASSERT_TRUE(parseMIR(MIR));
- Rematerializer &Remater = getRematerializer(MIR, "MultiRegionsRemat");
+ Rematerializer &Remater =
+ getRematerializer(MIR, "MultiRegionsRemat", /*SupportRollback=*/false);
Rematerializer::DependencyReuseInfo DRI;
// MBB/Region indices.
@@ -339,31 +345,28 @@ body: |
// Rematerialization to MBB1.
const unsigned RematBB1 =
- Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB1,
- /*SupportRollback=*/false, DRI);
+ Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB1, DRI);
++RegionSizes[MBB1];
ASSERT_REGION_SIZES(RegionSizes);
- ASSERT_REMAT(/*RegIdx=*/RematBB1, /*ParentIdx=*/Cst0, /*DefRegionIdx=*/MBB1,
+ EXPECT_REMAT(/*RegIdx=*/RematBB1, /*OriginIdx=*/Cst0, /*DefRegionIdx=*/MBB1,
/*NumUsers=*/1);
// Rematerialization to MBB2.
const unsigned RematBB2 =
- Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB2,
- /*SupportRollback=*/false, DRI);
+ Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB2, DRI);
++RegionSizes[MBB2];
ASSERT_REGION_SIZES(RegionSizes);
- ASSERT_REMAT(/*RegIdx=*/RematBB2, /*ParentIdx=*/Cst0, /*DefRegionIdx=*/MBB2,
+ EXPECT_REMAT(/*RegIdx=*/RematBB2, /*OriginIdx=*/Cst0, /*DefRegionIdx=*/MBB2,
/*NumUsers=*/2);
// Rematerialization to MBB3. Rematerializing to the last original user
// deletes the original register.
const unsigned RematBB3 =
- Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB3,
- /*SupportRollback=*/false, DRI);
+ Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB3, DRI);
--RegionSizes[MBB0];
++RegionSizes[MBB3];
ASSERT_REGION_SIZES(RegionSizes);
- ASSERT_REMAT(/*RegIdx=*/RematBB3, /*ParentIdx=*/Cst0, /*DefRegionIdx=*/MBB3,
+ EXPECT_REMAT(/*RegIdx=*/RematBB3, /*OriginIdx=*/Cst0, /*DefRegionIdx=*/MBB3,
/*NumUsers=*/1);
Remater.updateLiveIntervals();
@@ -371,7 +374,7 @@ body: |
/// Rematerializes a tree of register with some unrematerializable operands to a
/// final destination in two steps, creating rematerializations of
-/// rematerializations in the process. Make sure that parents of
+/// rematerializations in the process. Make sure that origins of
/// rematerializations are always original registers.
TEST_F(RematerializerTest, MultiStep) {
StringRef MIR = R"(
@@ -395,7 +398,8 @@ body: |
...
)";
ASSERT_TRUE(parseMIR(MIR));
- Rematerializer &Remater = getRematerializer(MIR, "MultiStep");
+ Rematerializer &Remater =
+ getRematerializer(MIR, "MultiStep", /*SupportRollback=*/false);
Rematerializer::DependencyReuseInfo DRI;
// MBB/Region indices.
@@ -413,15 +417,14 @@ body: |
// implicit def that is non-ignorable so it cannot be rematerialized. The
// constant 0 remains in the first block because it has a user there, but the
// add is deleted.
- Remater.rematerializeToRegion(/*RootIdx=*/Add01, /*UseRegion=*/MBB1,
- /*SupportRollback=*/false, DRI);
+ Remater.rematerializeToRegion(/*RootIdx=*/Add01, /*UseRegion=*/MBB1, DRI);
const unsigned RematCst0 = NumRegs++, RematAdd01 = NumRegs++;
RegionSizes[MBB0] -= 1;
RegionSizes[MBB1] += 2;
ASSERT_REGION_SIZES(RegionSizes);
- ASSERT_REMAT(/*RegIdx=*/RematCst0, /*ParentIdx=*/Cst0, /*DefRegionIdx=*/MBB1,
+ EXPECT_REMAT(/*RegIdx=*/RematCst0, /*OriginIdx=*/Cst0, /*DefRegionIdx=*/MBB1,
/*NumUsers=*/1);
- ASSERT_REMAT(/*RegIdx=*/RematAdd01, /*ParentIdx=*/Add01,
+ EXPECT_REMAT(/*RegIdx=*/RematAdd01, /*OriginIdx=*/Add01,
/*DefRegionIdx=*/MBB1,
/*NumUsers=*/1);
@@ -431,18 +434,17 @@ body: |
// Rematerialize Add22 from the second to the third block, which will
// also indirectly rematerialize RematAdd01; make sure the latter's
- // rematerializations's parent is the original register, not RematAdd01.
+ // rematerializations's origin is the original register, not RematAdd01.
DRI.reuse(RematCst0);
- Remater.rematerializeToRegion(/*RootIdx=*/Add22, /*UseRegion=*/MBB2,
- /*SupportRollback=*/false, DRI);
+ Remater.rematerializeToRegion(/*RootIdx=*/Add22, /*UseRegion=*/MBB2, DRI);
const unsigned RematRematAdd01 = NumRegs++, RematAdd22 = NumRegs++;
RegionSizes[MBB1] -= 2;
RegionSizes[MBB2] += 2;
ASSERT_REGION_SIZES(RegionSizes);
- ASSERT_REMAT(/*RegIdx=*/RematRematAdd01, /*ParentIdx=*/Add01,
+ EXPECT_REMAT(/*RegIdx=*/RematRematAdd01, /*OriginIdx=*/Add01,
/*DefRegionIdx=*/MBB2,
/*NumUsers=*/1);
- ASSERT_REMAT(/*RegIdx=*/RematAdd22, /*ParentIdx=*/Add22,
+ EXPECT_REMAT(/*RegIdx=*/RematAdd22, /*OriginIdx=*/Add22,
/*DefRegionIdx=*/MBB2,
/*NumUsers=*/1);
>From 0c798b2f9b4f2806ac792436c04ca12ff704e127 Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Thu, 19 Feb 2026 13:22:32 +0000
Subject: [PATCH 6/6] Address review feedback
---
llvm/include/llvm/CodeGen/Rematerializer.h | 161 +++----
llvm/lib/CodeGen/Rematerializer.cpp | 410 ++++++++++--------
llvm/unittests/CodeGen/RematerializerTest.cpp | 238 +++++++---
3 files changed, 508 insertions(+), 301 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/Rematerializer.h b/llvm/include/llvm/CodeGen/Rematerializer.h
index 45c99c88e155c..44b50b1e597c7 100644
--- a/llvm/include/llvm/CodeGen/Rematerializer.h
+++ b/llvm/include/llvm/CodeGen/Rematerializer.h
@@ -11,11 +11,15 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_REMATERIALIZER_H
+#define LLVM_CODEGEN_REMATERIALIZER_H
+
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include <iterator>
@@ -30,15 +34,16 @@ namespace llvm {
/// 2. The single defining instruction is deemed rematerializable by the TII and
/// doesn't have any physical register use that is both non-constant and
/// non-ignorable.
-/// 3. The register has at least one non-debug use that is inside a region or a
-/// region terminator terminator.
+/// 3. The register has at least one non-debug use that is inside or at a region
+/// boundary (see below for what we consider to be a region).
///
/// Rematerializable registers (represented by \ref Rematerializer::Reg) form a
/// DAG of their own, with every register having incoming edges from all
/// rematerializable registers which are read by the instruction defining it. It
/// is possible to rematerialize registers with unrematerializable dependencies;
/// however the latter are not considered part of this DAG since their
-/// position/identity never change and therefore the same kind of tracking.
+/// position/identity never change and therefore do not require the same level
+/// of tracking.
///
/// Each register has a "dependency DAG" which is defined as the subset of nodes
/// in the overall DAG that have at least one path to the register, which is
@@ -55,9 +60,18 @@ namespace llvm {
/// which perform actual rematerializations should however be assumed to
/// invalidate addresses to \ref Rematerializer::Reg objects.
///
-/// The API also uses dense unsigned integers starting at 0 to reference
-/// regions. These map directly to the indices of the corresponding regions in
-/// the \p Rematerializer::Regions vector pased during construction.
+/// The rematerializer tracks def/use points of registers based on regions.
+/// These are alike the regions the machine scheduler works on. A region is
+/// simply a pair on MBB iterators encoding a range of machine instructions. The
+/// first iterator (beginning of the region) is inclusive whereas the second
+/// iterator (end of the region) is exclusive and can either point to a MBB's
+/// end sentinel or an actual MI (not necessarily a terminator). Regions must be
+/// non-empty, cannot overlap, and cannot contain terminators. However, they do
+/// not have to cover the whole function.
+///
+/// The API uses dense unsigned integers starting at 0 to reference regions.
+/// These map directly to the indices of the corresponding regions in the region
+/// vector passed during construction.
///
/// The rematerializer supports rematerializing arbitrary complex DAGs of
/// registers to regions where these registers are used, with the option of
@@ -68,7 +82,7 @@ namespace llvm {
/// pre-rematerialization. When enabled, machine instructions defining
/// rematerializable registers that no longer have any uses following previous
/// rematerializations will not be deleted from the MIR; their opcode will
-/// instead be set to a debug value and their read register operands set to the
+/// instead be set to a DEBUG_VALUE and their read register operands set to the
/// null register. This maintains their position in the MIR and keeps the
/// original register alive for potential rollback while allowing other
/// passes/analyzes (e.g., machine scheduler, live-interval analysis) to ignore
@@ -92,6 +106,9 @@ namespace llvm {
/// registers have rematerializations.
class Rematerializer {
public:
+ /// Index type for rematerializable registers.
+ using RegisterIdx = unsigned;
+
/// A rematerializable register defined by a single machine instruction.
///
/// A rematerializable register has a set of dependencies, which correspond
@@ -126,9 +143,9 @@ class Rematerializer {
/// The register's machine operand index in \p DefMI.
unsigned MOIdx;
/// The corresponding register's index in the rematerializer.
- unsigned RegIdx;
+ RegisterIdx RegIdx;
- Dependency(unsigned MOIdx, unsigned RegIdx)
+ Dependency(unsigned MOIdx, RegisterIdx RegIdx)
: MOIdx(MOIdx), RegIdx(RegIdx) {}
};
/// This register's rematerializable dependencies, one per unique
@@ -157,6 +174,10 @@ class Rematerializer {
std::pair<MachineInstr *, MachineInstr *>
getRegionUseBounds(unsigned UseRegion, const LiveIntervals &LIS) const;
+ bool isAlive() const {
+ return DefMI && DefMI->getOpcode() != TargetOpcode::DBG_VALUE;
+ }
+
private:
void addUser(MachineInstr *MI, unsigned Region);
void addUsers(const RegionUsers &NewUsers, unsigned Region);
@@ -176,9 +197,8 @@ class Rematerializer {
/// Simply initializes some internal state, does not identify
/// rematerialization candidates.
Rematerializer(MachineFunction &MF,
- SmallVectorImpl<RegionBoundaries> &Regions, LiveIntervals &LIS)
- : Regions(Regions), MRI(MF.getRegInfo()), LIS(LIS),
- TII(*MF.getSubtarget().getInstrInfo()), TRI(TII.getRegisterInfo()) {}
+ SmallVectorImpl<RegionBoundaries> &Regions,
+ LiveIntervals &LIS);
/// Goes through the whole MF and identifies all rematerializable registers.
/// When \p SupportRollback is set, rematerializations of original registers
@@ -187,14 +207,14 @@ class Rematerializer {
/// rematerializable register in regions.
bool analyze(bool SupportRollback);
- inline const Reg &getReg(unsigned RegIdx) const {
+ inline const Reg &getReg(RegisterIdx RegIdx) const {
assert(RegIdx < Regs.size() && "out of bounds");
return Regs[RegIdx];
};
inline ArrayRef<Reg> getRegs() const { return Regs; };
inline unsigned getNumRegs() const { return Regs.size(); };
- inline const RegionBoundaries &getRegion(unsigned RegionIdx) {
+ inline const RegionBoundaries &getRegion(RegisterIdx RegionIdx) {
assert(RegionIdx < Regions.size() && "out of bounds");
return Regions[RegionIdx];
}
@@ -202,18 +222,18 @@ class Rematerializer {
/// Whether register \p RegIdx is a rematerialization of some original
/// register.
- inline bool isRematerializedRegister(unsigned RegIdx) const {
+ inline bool isRematerializedRegister(RegisterIdx RegIdx) const {
assert(RegIdx < Regs.size() && "out of bounds");
return RegIdx >= UnrematableOprds.size();
}
/// Returns the origin index of rematerializable register \p RegIdx.
- inline unsigned getOriginOf(unsigned RematRegIdx) const {
+ inline RegisterIdx getOriginOf(RegisterIdx RematRegIdx) const {
assert(isRematerializedRegister(RematRegIdx) && "not a rematerialization");
return Origins[RematRegIdx - UnrematableOprds.size()];
}
/// If \p RegIdx is a rematerialization, returns its origin's index. If it is
/// an original register's index, returns the same index.
- inline unsigned getOriginOrSelf(unsigned RegIdx) const {
+ inline RegisterIdx getOriginOrSelf(RegisterIdx RegIdx) const {
if (isRematerializedRegister(RegIdx))
return getOriginOf(RegIdx);
return RegIdx;
@@ -259,13 +279,13 @@ class Rematerializer {
/// root's dependency DAG. It follows that the difference in map size before
/// and after rematerialization indicates the number of non-root registers
/// that were rematerialized along the root.
- SmallDenseMap<unsigned, unsigned, 4> DependencyMap;
+ SmallDenseMap<RegisterIdx, RegisterIdx, 4> DependencyMap;
- DependencyReuseInfo &reuse(unsigned DepIdx) {
+ DependencyReuseInfo &reuse(RegisterIdx DepIdx) {
DependencyMap.insert({DepIdx, DepIdx});
return *this;
}
- DependencyReuseInfo &useRemat(unsigned DepIdx, unsigned DepRematIdx) {
+ DependencyReuseInfo &useRemat(RegisterIdx DepIdx, RegisterIdx DepRematIdx) {
DependencyMap.insert({DepIdx, DepRematIdx});
return *this;
}
@@ -280,25 +300,24 @@ class Rematerializer {
/// register, and returns the latter's index. The root's dependency DAG is
/// rematerialized or re-used according to \p DRI.
///
- /// When the method returns, \p DRI contains additional mappings of all
- /// transitive dependencies that had to be rematerialized to their
- /// rematerialization's respective index. References to \ref
- /// Rematerializer::Reg should be considered invalidated by calls to this
- /// method.
- unsigned rematerializeToRegion(unsigned RootIdx, unsigned UseRegion,
- DependencyReuseInfo &DRI);
-
- /// Rematerializes register \p RootIdx to position \p InsertPos and returns
- /// the new register's index. The root's dependency DAG is rematerialized or
- /// re-used according to \p DRI.
+ /// When the method returns, \p DRI contains additional entries for non-root
+ /// registers of the root's dependency DAG that needed to be rematerialized
+ /// along the root. References to \ref Rematerializer::Reg should be
+ /// considered invalidated by calls to this method.
+ RegisterIdx rematerializeToRegion(RegisterIdx RootIdx, unsigned UseRegion,
+ DependencyReuseInfo &DRI);
+
+ /// Rematerializes register \p RootIdx before position \p InsertPos and
+ /// returns the new register's index. The root's dependency DAG is
+ /// rematerialized or re-used according to \p DRI.
///
- /// When the method returns, \p DRI contains additional mappings of all
- /// transitive dependencies that had to be rematerialized to their respective
- /// rematerialization's index. References to \ref Rematerializer::Reg should
- /// be considered invalidated by calls to this method.
- unsigned rematerializeToPos(unsigned RootIdx,
- MachineBasicBlock::iterator InsertPos,
- DependencyReuseInfo &DRI);
+ /// When the method returns, \p DRI contains additional entries for non-root
+ /// registers of the root's dependency DAG that needed to be rematerialized
+ /// along the root. References to \ref Rematerializer::Reg should be
+ /// considered invalidated by calls to this method.
+ RegisterIdx rematerializeToPos(RegisterIdx RootIdx,
+ MachineBasicBlock::iterator InsertPos,
+ DependencyReuseInfo &DRI);
/// Rolls back all rematerializations of original register \p RootIdx,
/// transfering all their users back to it and permanently deleting them from
@@ -307,33 +326,33 @@ class Rematerializer {
/// dependencies of the root register that were fully rematerialized are
/// re-vived at their original positions; this requires that rollback support
/// was set when they were rematerialized.
- void rollbackRematsOf(unsigned RootIdx);
+ void rollbackRematsOf(RegisterIdx RootIdx);
/// Rolls back register \p RematIdx (which must be a rematerialization)
/// transfering all its users back to its origin. The latter is revived if it
/// was fully rematerialized (this requires that rollback support was set at
/// that time).
- void rollback(unsigned RematIdx);
+ void rollback(RegisterIdx RematIdx);
/// Revives original register \p RootIdx at its original position in the MIR
/// if it was fully rematerialized with rollback support set. Transitive
/// dependencies of the root register that were fully rematerialized are
/// revived at their original positions; this requires that rollback support
/// was set when they were themselves rematerialized.
- void reviveRegIfDead(unsigned RootIdx);
+ void reviveRegIfDead(RegisterIdx RootIdx);
/// Transfers all users of register \p FromRegIdx in region \p UseRegion to \p
/// ToRegIdx, the latter of which must be a rematerialization of the former or
/// have the same origin register. Users in \p UseRegion must be reachable
/// from \p ToRegIdx.
- void transferRegionUsers(unsigned FromRegIdx, unsigned ToRegIdx,
+ void transferRegionUsers(RegisterIdx FromRegIdx, RegisterIdx ToRegIdx,
unsigned UseRegion);
/// Transfers user \p UserMI from register \p FromRegIdx to \p ToRegIdx,
/// the latter of which must be a rematerialization of the former or have the
/// same origin register. \p UserMI must be a direct user of \p FromRegIdx. \p
/// UserMI must be reachable from \p ToRegIdx.
- void transferUser(unsigned FromRegIdx, unsigned ToRegIdx,
+ void transferUser(RegisterIdx FromRegIdx, RegisterIdx ToRegIdx,
MachineInstr &UserMI);
/// Recomputes all live intervals that have changed as a result of previous
@@ -344,7 +363,7 @@ class Rematerializer {
/// support rollback.
void commitRematerializations();
- /// Determines whether (sub-)register operand \p MO is has the same value at
+ /// Determines whether (sub-)register operand \p MO has the same value at
/// all \p Uses as at \p MO. This implies that it is also available at all \p
/// Uses according to its current live interval.
bool isMOIdenticalAtUses(MachineOperand &MO, ArrayRef<SlotIndex> Uses) const;
@@ -352,13 +371,13 @@ class Rematerializer {
/// Finds the closest rematerialization of register \p RegIdx in region \p
/// Region that exists before slot \p Before. If no such rematerialization
/// exists, returns \ref Rematerializer::NoReg.
- unsigned findRematInRegion(unsigned RegIdx, unsigned Region,
- SlotIndex Before) const;
+ RegisterIdx findRematInRegion(RegisterIdx RegIdx, unsigned Region,
+ SlotIndex Before) const;
- Printable printDependencyDAG(unsigned RootIdx) const;
- Printable printID(unsigned RegIdx) const;
- Printable printRematReg(unsigned RegIdx, bool SkipRegions = false) const;
- Printable printRegUsers(unsigned RegIdx) const;
+ Printable printDependencyDAG(RegisterIdx RootIdx) const;
+ Printable printID(RegisterIdx RegIdx) const;
+ Printable printRematReg(RegisterIdx RegIdx, bool SkipRegions = false) const;
+ Printable printRegUsers(RegisterIdx RegIdx) const;
Printable printUser(const MachineInstr *MI) const;
private:
@@ -382,30 +401,33 @@ class Rematerializer {
/// order in which they are created. The size of the vector indicates the
/// total number of rematerializations ever created, including those that were
/// deleted or rolled back.
- SmallVector<unsigned> Origins;
+ SmallVector<RegisterIdx> Origins;
+ using RematsOf = SmallDenseSet<RegisterIdx, 4>;
/// Maps original register indices to their currently alive
/// rematerializations. In practice most registers don't have
/// rematerializations so this is represented as a map to lower memory cost.
- DenseMap<unsigned, SmallDenseSet<unsigned, 4>> Rematerializations;
+ DenseMap<RegisterIdx, RematsOf> Rematerializations;
/// Registers mapped to the index of their corresponding rematerialization
/// data in the \ref Regs vector. This includes registers that no longer exist
/// in the MIR.
- DenseMap<Register, unsigned> RegToIdx;
+ DenseMap<Register, RegisterIdx> RegToIdx;
/// Maps all MIs to their parent region. Region terminators are considered
/// part of the region they terminate.
DenseMap<MachineInstr *, unsigned> MIRegion;
/// Set of registers whose live-range may have changed during past
/// rematerializations/rollbacks.
- DenseSet<unsigned> LISUpdates;
+ DenseSet<RegisterIdx> LISUpdates;
/// Keys are fully rematerialized registers whose rematerializations are
/// currently rollback-able. Values map register machine operand indices to
/// their original register.
- DenseMap<unsigned, DenseMap<unsigned, Register>> Rollbackable;
+ DenseMap<RegisterIdx, DenseMap<unsigned, Register>> Revivable;
/// Whether all rematerializations of registers identified during the last
/// analysis phase will be rollback-able.
bool SupportRollback = false;
+ /// During the analysis phase, creates a \ref Rematerializer::Reg object for
+ /// virtual register \p VirtRegIdx if it
void addRegIfRematerializable(unsigned VirtRegIdx, BitVector &SeenRegs);
/// Determines whether \p MI is considered rematerializable. This further
@@ -423,38 +445,31 @@ class Rematerializer {
/// references to elements within it should be considered invalidated across
/// calls to this method unless the vector can be guaranteed to have enough
/// space for an extra element.
- unsigned rematerializeReg(unsigned RegIdx,
- MachineBasicBlock::iterator InsertPos,
- SmallVectorImpl<Reg::Dependency> &&Dependencies);
+ RegisterIdx rematerializeReg(RegisterIdx RegIdx,
+ MachineBasicBlock::iterator InsertPos,
+ SmallVectorImpl<Reg::Dependency> &&Dependencies);
- /// Internal version of \ref Rematerializer::transferUser that doesn't update
+ /// Implementation of \ref Rematerializer::transferUser that doesn't update
/// register users.
- void transferUserInternal(unsigned FromRegIdx, unsigned ToRegIdx,
- MachineInstr &UserMI);
+ void transferUserImpl(RegisterIdx FromRegIdx, RegisterIdx ToRegIdx,
+ MachineInstr &UserMI);
/// Deletes register \p RootIdx if it no longer has any user. If the register
/// is deleted, recursively deletes any of its transitive rematerializable
/// dependencies that no longer have users as a result.
- bool deleteRegIfUnused(unsigned RootIdx);
+ void deleteRegIfUnused(RegisterIdx RootIdx);
/// Deletes rematerializable register \p RegIdx from the DAG and relevant
/// internal state.
- void deleteReg(unsigned RegIdx);
+ void deleteReg(RegisterIdx RegIdx);
/// If \p MI's first operand defines a register and that register is a
/// rematerializable register tracked by the rematerializer, returns its
/// index in the \ref Regs vector. Otherwise returns \ref
/// Rematerializer::NoReg.
- unsigned getDefRegIdx(const MachineInstr &MI) const;
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- unsigned CallDepth = 0;
- raw_ostream &rdbgs() const {
- for (unsigned I = 0; I < CallDepth; ++I)
- dbgs() << " ";
- return dbgs();
- }
-#endif
+ RegisterIdx getDefRegIdx(const MachineInstr &MI) const;
};
} // namespace llvm
+
+#endif // LLVM_CODEGEN_REMATERIALIZER_H
diff --git a/llvm/lib/CodeGen/Rematerializer.cpp b/llvm/lib/CodeGen/Rematerializer.cpp
index ce439322e26e2..1c28c8afc52eb 100644
--- a/llvm/lib/CodeGen/Rematerializer.cpp
+++ b/llvm/lib/CodeGen/Rematerializer.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/Rematerializer.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -27,15 +28,18 @@
#define DEBUG_TYPE "rematerializer"
using namespace llvm;
+using RegisterIdx = Rematerializer::RegisterIdx;
-static bool isAvailableAtUse(const VNInfo *OVNI, LaneBitmask Mask,
+/// Checks whether the value in \p LI at \p UseIdx is identical to \p OVNI (this
+/// implies it is also live there). When \p LI has sub-ranges, checks that
+/// all sub-ranges intersecting with \p Mask are also live at \p UseIdx.
+static bool isIdenticalAtUse(const VNInfo &OVNI, LaneBitmask Mask,
SlotIndex UseIdx, const LiveInterval &LI) {
- assert(OVNI);
- if (OVNI != LI.getVNInfoAt(UseIdx))
+ if (&OVNI != LI.getVNInfoAt(UseIdx))
return false;
- // Check that subrange is live at user.
if (LI.hasSubRanges()) {
+ // Check that intersecting subranges are live at user.
for (const LiveInterval::SubRange &SR : LI.subranges()) {
if ((SR.LaneMask & Mask).none())
continue;
@@ -51,7 +55,9 @@ static bool isAvailableAtUse(const VNInfo *OVNI, LaneBitmask Mask,
return true;
}
-static Register isRegDependency(const MachineOperand &MO) {
+/// If \p MO is a virtual read register, returns it. Otherwise returns the
+/// sentinel register.
+static Register getRegDependency(const MachineOperand &MO) {
if (!MO.isReg() || !MO.readsReg())
return Register();
Register Reg = MO.getReg();
@@ -63,136 +69,150 @@ static Register isRegDependency(const MachineOperand &MO) {
return Reg;
}
-unsigned Rematerializer::rematerializeToRegion(unsigned RootIdx,
- unsigned UseRegion,
- DependencyReuseInfo &DRI) {
-
+RegisterIdx Rematerializer::rematerializeToRegion(RegisterIdx RootIdx,
+ unsigned UseRegion,
+ DependencyReuseInfo &DRI) {
MachineInstr *FirstMI =
getReg(RootIdx).getRegionUseBounds(UseRegion, LIS).first;
- unsigned NewRegIdx = rematerializeToPos(RootIdx, FirstMI, DRI);
+ RegisterIdx NewRegIdx = rematerializeToPos(RootIdx, FirstMI, DRI);
transferRegionUsers(RootIdx, NewRegIdx, UseRegion);
return NewRegIdx;
}
-unsigned
-Rematerializer::rematerializeToPos(unsigned RootIdx,
+RegisterIdx
+Rematerializer::rematerializeToPos(RegisterIdx RootIdx,
MachineBasicBlock::iterator InsertPos,
DependencyReuseInfo &DRI) {
- LLVM_DEBUG({
- rdbgs() << "Rematerializing " << printID(RootIdx) << " to "
- << printUser(&*InsertPos) << '\n';
- ++CallDepth;
- });
-
- // Create/identify dependencies for the new register. Copy the dependencies
- // vector because underlying updates to the backing vector of registers may
- // invalidate references.
- SmallVector<Reg::Dependency, 2> NewDeps, Deps(Regs[RootIdx].Dependencies);
- for (const Reg::Dependency &Dep : Deps) {
- if (auto NewDep = DRI.DependencyMap.find(Dep.RegIdx);
- NewDep != DRI.DependencyMap.end()) {
- // We already have the version of the dependency we want to use.
- NewDeps.emplace_back(Dep.MOIdx, NewDep->second);
- } else {
- // Dependencies must be rematerialized in def-use order.
- unsigned NewDepIdx = rematerializeToPos(Dep.RegIdx, InsertPos, DRI);
- DRI.DependencyMap.insert({Dep.RegIdx, NewDepIdx});
- NewDeps.emplace_back(Dep.MOIdx, NewDepIdx);
+ assert(!DRI.DependencyMap.contains(RootIdx));
+ LLVM_DEBUG(dbgs() << "Rematerializing " << printID(RootIdx) << " to "
+ << printUser(&*InsertPos) << '\n');
+
+ // Traverse the root's dependency DAG depth-first to find the set of
+ // registers we must rematerialize along with it and a legal order to
+ // rematerialize them in.
+ SmallVector<RegisterIdx, 4> DepDAG{RootIdx};
+ SmallSetVector<RegisterIdx, 8> RematOrder;
+ RematOrder.insert(RootIdx);
+ do {
+ RegisterIdx RegIdx = DepDAG.pop_back_val();
+ for (const Reg::Dependency &Dep : getReg(RegIdx).Dependencies) {
+ // The dependency may already have a rematerialization ready to use.
+ if (DRI.DependencyMap.contains(Dep.RegIdx))
+ continue;
+ // We may have already seen the dependency in the dependency DAG.
+ if (RematOrder.contains(Dep.RegIdx))
+ continue;
+ DepDAG.push_back(Dep.RegIdx);
+ RematOrder.insert(Dep.RegIdx);
}
+ } while (!DepDAG.empty());
+
+ // Rematerialize all necessary registers in the root's dependency DAG. At each
+ // rematerialization, dependencies should already be available.
+ RegisterIdx LastNewIdx;
+ for (RegisterIdx RegIdx : reverse(RematOrder)) {
+ assert(!DRI.DependencyMap.contains(RegIdx) && "useless remat");
+ SmallVector<Reg::Dependency, 2> Dependencies;
+ for (const Reg::Dependency &Dep : getReg(RegIdx).Dependencies)
+ Dependencies.emplace_back(Dep.MOIdx, DRI.DependencyMap.at(Dep.RegIdx));
+ LastNewIdx = rematerializeReg(RegIdx, InsertPos, std::move(Dependencies));
+ DRI.DependencyMap.insert({RegIdx, LastNewIdx});
}
- LLVM_DEBUG(--CallDepth);
- return rematerializeReg(RootIdx, InsertPos, std::move(NewDeps));
+ return LastNewIdx;
}
-void Rematerializer::rollbackRematsOf(unsigned RootIdx) {
+void Rematerializer::rollbackRematsOf(RegisterIdx RootIdx) {
auto Remats = Rematerializations.find(RootIdx);
if (Remats == Rematerializations.end())
return;
- LLVM_DEBUG({
- rdbgs() << "Rolling back rematerializations of " << printID(RootIdx)
- << '\n';
- });
+ LLVM_DEBUG(dbgs() << "Rolling back rematerializations of " << printID(RootIdx)
+ << '\n');
reviveRegIfDead(RootIdx);
// All of the rematerialization's users must use the revived register.
- for (unsigned RematRegIdx : Remats->getSecond()) {
+ for (RegisterIdx RematRegIdx : Remats->getSecond()) {
for (const auto &[UseRegion, RegionUsers] : Regs[RematRegIdx].Uses)
transferRegionUsers(RematRegIdx, RootIdx, UseRegion);
}
Rematerializations.erase(RootIdx);
- LLVM_DEBUG({
- rdbgs() << "** Rolled back rematerializations of " << printID(RootIdx)
- << '\n';
- });
+ LLVM_DEBUG(dbgs() << "** Rolled back rematerializations of "
+ << printID(RootIdx) << '\n');
}
-void Rematerializer::rollback(unsigned RematIdx) {
- assert(getReg(RematIdx).DefMI && !Rollbackable.contains(RematIdx) &&
+void Rematerializer::rollback(RegisterIdx RematIdx) {
+ assert(getReg(RematIdx).DefMI && !Revivable.contains(RematIdx) &&
"cannot rollback dead register");
- const unsigned OriginRegIdx = getOriginOf(RematIdx);
+ const RegisterIdx OriginRegIdx = getOriginOf(RematIdx);
reviveRegIfDead(OriginRegIdx);
for (const auto &[UseRegion, RegionUsers] : Regs[RematIdx].Uses)
transferRegionUsers(RematIdx, OriginRegIdx, UseRegion);
}
-void Rematerializer::reviveRegIfDead(unsigned RootIdx) {
- assert(!isRematerializedRegister(RootIdx) &&
- "cannot revive rematerialization");
-
- Reg &Root = Regs[RootIdx];
- if (!Root.Uses.empty()) {
- // The register still exists, nothing to do.
- LLVM_DEBUG(rdbgs() << printID(RootIdx) << " still exists\n");
+void Rematerializer::reviveRegIfDead(RegisterIdx RootIdx) {
+ if (getReg(RootIdx).isAlive())
return;
- }
+ assert(Revivable.contains(RootIdx) && "not revivable");
+
+ // Traverse the root's dependency DAG depth-first to find the set of
+ // registers we must revive and a legal order to revive them in.
+ SmallVector<RegisterIdx, 4> DepDAG{RootIdx};
+ SmallSetVector<RegisterIdx, 8> ReviveOrder;
+ ReviveOrder.insert(RootIdx);
+ do {
+ // All dependencies of a revived register need to be alive too.
+ const Reg &ReviveReg = getReg(DepDAG.pop_back_val());
+ for (const Reg::Dependency &Dep : ReviveReg.Dependencies) {
+ // We may have already seen the dependency in the dependency DAG.
+ if (ReviveOrder.contains(Dep.RegIdx))
+ continue;
- assert(Rollbackable.contains(RootIdx) && "not marked rollbackable");
- assert(Root.DefMI && Root.DefMI->getOpcode() == TargetOpcode::DBG_VALUE &&
- "not the right opcode");
- assert(Rematerializations.contains(RootIdx) && "no remats");
+ // Dead dependencies need to be revived.
+ Reg &DepReg = Regs[Dep.RegIdx];
+ if (!DepReg.isAlive()) {
+ assert(Revivable.contains(Dep.RegIdx) && "not revivable");
+ ReviveOrder.insert(Dep.RegIdx);
+ DepDAG.push_back(Dep.RegIdx);
+ }
- LLVM_DEBUG({
- rdbgs() << "Reviving " << printID(RootIdx) << '\n';
- ++CallDepth;
- });
+ // All dependencies get a new user (the revived register).
+ DepReg.addUser(ReviveReg.DefMI, ReviveReg.DefRegion);
+ LISUpdates.insert(Dep.RegIdx);
+ }
+ } while (!DepDAG.empty());
+
+ for (RegisterIdx RegIdx : reverse(ReviveOrder)) {
+ // Pick any rematerialization to retrieve the original opcode from.
+ Reg &ReviveReg = Regs[RegIdx];
+ assert(Rematerializations.contains(RegIdx) && "no remats");
+ RegisterIdx RematIdx = *Rematerializations.at(RegIdx).begin();
+ ReviveReg.DefMI->setDesc(getReg(RematIdx).DefMI->getDesc());
+ for (const auto &[MOIdx, Reg] : Revivable.at(RegIdx))
+ ReviveReg.DefMI->getOperand(MOIdx).setReg(Reg);
+ Revivable.erase(RegIdx);
+ LISUpdates.insert(RegIdx);
- // Fully rematerialized dependencies need to be revived. All dependencies gain
- // a new user.
- for (const Reg::Dependency &Dep : Root.Dependencies) {
- reviveRegIfDead(Dep.RegIdx);
- Regs[Dep.RegIdx].addUser(Root.DefMI, Root.DefRegion);
- LISUpdates.insert(Dep.RegIdx);
+ LLVM_DEBUG({
+ dbgs() << "** Revived " << printID(RegIdx) << " @ ";
+ LIS.getInstructionIndex(*ReviveReg.DefMI).print(dbgs());
+ dbgs() << '\n';
+ });
}
-
- // Pick any rematerialization to retrieve the original opcode from.
- unsigned RematIdx = *Rematerializations.at(RootIdx).begin();
- Root.DefMI->setDesc(getReg(RematIdx).DefMI->getDesc());
- for (const auto &[MOIdx, Reg] : Rollbackable.at(RootIdx))
- Root.DefMI->getOperand(MOIdx).setReg(Reg);
- Rollbackable.erase(RootIdx);
- LISUpdates.insert(RootIdx);
-
- LLVM_DEBUG({
- rdbgs() << "** Revived " << printID(RootIdx) << " @ ";
- LIS.getInstructionIndex(*Root.DefMI).print(dbgs());
- dbgs() << '\n';
- --CallDepth;
- });
}
-void Rematerializer::transferUser(unsigned FromRegIdx, unsigned ToRegIdx,
+void Rematerializer::transferUser(RegisterIdx FromRegIdx, RegisterIdx ToRegIdx,
MachineInstr &UserMI) {
- transferUserInternal(FromRegIdx, ToRegIdx, UserMI);
- unsigned UserRegion = MIRegion[&UserMI];
+ transferUserImpl(FromRegIdx, ToRegIdx, UserMI);
+ unsigned UserRegion = MIRegion.at(&UserMI);
Regs[FromRegIdx].eraseUser(&UserMI, UserRegion);
Regs[ToRegIdx].addUser(&UserMI, UserRegion);
deleteRegIfUnused(FromRegIdx);
}
-void Rematerializer::transferRegionUsers(unsigned FromRegIdx, unsigned ToRegIdx,
+void Rematerializer::transferRegionUsers(RegisterIdx FromRegIdx,
+ RegisterIdx ToRegIdx,
unsigned UseRegion) {
auto &FromRegUsers = Regs[FromRegIdx].Uses;
auto UsesIt = FromRegUsers.find(UseRegion);
@@ -201,15 +221,15 @@ void Rematerializer::transferRegionUsers(unsigned FromRegIdx, unsigned ToRegIdx,
const SmallDenseSet<MachineInstr *, 4> &RegionUsers = UsesIt->getSecond();
for (MachineInstr *UserMI : RegionUsers)
- transferUserInternal(FromRegIdx, ToRegIdx, *UserMI);
+ transferUserImpl(FromRegIdx, ToRegIdx, *UserMI);
Regs[ToRegIdx].addUsers(RegionUsers, UseRegion);
FromRegUsers.erase(UseRegion);
deleteRegIfUnused(FromRegIdx);
}
-void Rematerializer::transferUserInternal(unsigned FromRegIdx,
- unsigned ToRegIdx,
- MachineInstr &UserMI) {
+void Rematerializer::transferUserImpl(RegisterIdx FromRegIdx,
+ RegisterIdx ToRegIdx,
+ MachineInstr &UserMI) {
assert(MIRegion.contains(&UserMI) && "unknown user");
assert(getReg(FromRegIdx).Uses.at(MIRegion.at(&UserMI)).contains(&UserMI) &&
"not a user");
@@ -217,9 +237,8 @@ void Rematerializer::transferUserInternal(unsigned FromRegIdx,
assert(getOriginOrSelf(FromRegIdx) == getOriginOrSelf(ToRegIdx) &&
"unrelated registers");
- LLVM_DEBUG(rdbgs() << "User transfer from " << printID(FromRegIdx) << " to "
- << printID(ToRegIdx) << ": " << printUser(&UserMI)
- << '\n');
+ LLVM_DEBUG(dbgs() << "User transfer from " << printID(FromRegIdx) << " to "
+ << printID(ToRegIdx) << ": " << printUser(&UserMI) << '\n');
UserMI.substituteRegister(getReg(FromRegIdx).getDefReg(),
getReg(ToRegIdx).getDefReg(), 0, TRI);
@@ -228,7 +247,7 @@ void Rematerializer::transferUserInternal(unsigned FromRegIdx,
// If the user is rematerializable, we must change its dependency to the
// new register.
- if (unsigned UserRegIdx = getDefRegIdx(UserMI); UserRegIdx != NoReg) {
+ if (RegisterIdx UserRegIdx = getDefRegIdx(UserMI); UserRegIdx != NoReg) {
// Look for the user's dependency that matches the register.
for (Reg::Dependency &Dep : Regs[UserRegIdx].Dependencies) {
if (Dep.RegIdx == FromRegIdx) {
@@ -242,9 +261,9 @@ void Rematerializer::transferUserInternal(unsigned FromRegIdx,
void Rematerializer::updateLiveIntervals() {
DenseSet<Register> SeenUnrematRegs;
- for (unsigned RegIdx : LISUpdates) {
+ for (RegisterIdx RegIdx : LISUpdates) {
const Reg &UpdateReg = getReg(RegIdx);
- assert(UpdateReg.DefMI || Rollbackable.contains(RegIdx) && "dead register");
+ assert((UpdateReg.DefMI || Revivable.contains(RegIdx)) && "dead reg");
Register DefReg = UpdateReg.getDefReg();
if (LIS.hasInterval(DefReg))
@@ -252,9 +271,9 @@ void Rematerializer::updateLiveIntervals() {
LIS.createAndComputeVirtRegInterval(DefReg);
LLVM_DEBUG({
- rdbgs() << "Re-computed interval for " << printID(RegIdx) << ": ";
+ dbgs() << "Re-computed interval for " << printID(RegIdx) << ": ";
LIS.getInterval(DefReg).print(dbgs());
- rdbgs() << '\n' << printRegUsers(RegIdx);
+ dbgs() << '\n' << printRegUsers(RegIdx);
});
// Update intervals for unrematerializable operands.
@@ -276,9 +295,9 @@ void Rematerializer::updateLiveIntervals() {
}
void Rematerializer::commitRematerializations() {
- for (auto &[RegIdx, _] : Rollbackable)
+ for (auto &[RegIdx, _] : Revivable)
deleteReg(RegIdx);
- Rollbackable.clear();
+ Revivable.clear();
}
bool Rematerializer::isMOIdenticalAtUses(MachineOperand &MO,
@@ -293,22 +312,23 @@ bool Rematerializer::isMOIdenticalAtUses(MachineOperand &MO,
const VNInfo *DefVN =
LI.getVNInfoAt(LIS.getInstructionIndex(*MO.getParent()).getRegSlot(true));
for (SlotIndex Use : Uses) {
- if (!isAvailableAtUse(DefVN, Mask, Use, LI))
+ if (!isIdenticalAtUse(*DefVN, Mask, Use, LI))
return false;
}
return true;
}
-unsigned Rematerializer::findRematInRegion(unsigned RegIdx, unsigned Region,
- SlotIndex Before) const {
+RegisterIdx Rematerializer::findRematInRegion(RegisterIdx RegIdx,
+ unsigned Region,
+ SlotIndex Before) const {
auto It = Rematerializations.find(getOriginOrSelf(RegIdx));
if (It == Rematerializations.end())
return NoReg;
- const SmallDenseSet<unsigned, 4> &Remats = It->getSecond();
+ const RematsOf &Remats = It->getSecond();
SlotIndex BestSlot;
- unsigned BestRegIdx = NoReg;
- for (unsigned RematRegIdx : Remats) {
+ RegisterIdx BestRegIdx = NoReg;
+ for (RegisterIdx RematRegIdx : Remats) {
const Reg &RematReg = getReg(RematRegIdx);
if (RematReg.DefRegion != Region || RematReg.Uses.empty())
continue;
@@ -323,67 +343,100 @@ unsigned Rematerializer::findRematInRegion(unsigned RegIdx, unsigned Region,
return BestRegIdx;
}
-bool Rematerializer::deleteRegIfUnused(unsigned RootIdx) {
- Reg &Root = Regs[RootIdx];
- if (!Root.Uses.empty())
- return false;
- LLVM_DEBUG({
- rdbgs() << "Deleting " << printID(RootIdx) << " with no users\n";
- ++CallDepth;
- });
-
- Register DefReg = Root.getDefReg();
- for (const Reg::Dependency &Dep : Root.Dependencies) {
- LLVM_DEBUG(rdbgs() << "Deleting user from " << printID(Dep.RegIdx) << "\n");
- Regs[Dep.RegIdx].eraseUser(Root.DefMI, Root.DefRegion);
- deleteRegIfUnused(Dep.RegIdx);
- }
+void Rematerializer::deleteRegIfUnused(RegisterIdx RootIdx) {
+ if (!getReg(RootIdx).Uses.empty())
+ return;
- LIS.removeInterval(DefReg);
- LISUpdates.erase(RootIdx);
- if (SupportRollback && !isRematerializedRegister(RootIdx)) {
- // Replace all read registers with the null one to prevent them from showing
- // up in use-lists, which is disallowed for debug instructions in live
- // interval calculations. Store mappings between operand indices and
- // original registers for potential rollback.
- DenseMap<unsigned, Register> &RegMap =
- Rollbackable.try_emplace(RootIdx).first->getSecond();
- for (auto [Idx, MO] : enumerate(Root.DefMI->operands())) {
- if (MO.isReg() && MO.readsReg()) {
- RegMap.insert({Idx, MO.getReg()});
- MO.setReg(Register());
+ // Traverse the root's dependency DAG depth-first to find the set of registers
+ // we can delete and a legal order to delete them in.
+ SmallVector<RegisterIdx, 4> DepDAG{RootIdx};
+ SmallSetVector<RegisterIdx, 8> DeleteOrder;
+ DeleteOrder.insert(RootIdx);
+ do {
+ // A deleted register's dependencies may be deletable too.
+ const Reg &DeleteReg = getReg(DepDAG.pop_back_val());
+ for (const Reg::Dependency &Dep : DeleteReg.Dependencies) {
+ // All dependencies loose a user (the delete register).
+ Reg &DepReg = Regs[Dep.RegIdx];
+ DepReg.eraseUser(DeleteReg.DefMI, DeleteReg.DefRegion);
+ if (DepReg.Uses.empty()) {
+ DeleteOrder.insert(Dep.RegIdx);
+ DepDAG.push_back(Dep.RegIdx);
}
}
- Root.DefMI->setDesc(TII.get(TargetOpcode::DBG_VALUE));
- } else {
- deleteReg(RootIdx);
- }
- if (isRematerializedRegister(RootIdx)) {
- SmallDenseSet<unsigned, 4> &Remats =
- Rematerializations.at(getOriginOf(RootIdx));
- assert(Remats.contains(RootIdx) && "broken link between remat and origin");
- Remats.erase(RootIdx);
- if (Remats.empty())
- Rematerializations.erase(RootIdx);
+ } while (!DepDAG.empty());
+
+ for (RegisterIdx RegIdx : reverse(DeleteOrder)) {
+ Reg &DeleteReg = Regs[RegIdx];
+ LIS.removeInterval(DeleteReg.getDefReg());
+ LISUpdates.erase(RegIdx);
+ const bool IsRematerializedReg = isRematerializedRegister(RegIdx);
+ if (SupportRollback && !IsRematerializedReg) {
+ // Replace all read registers with the null one to prevent them from
+ // showing up in use-lists, which is disallowed for debug instructions in
+ // live interval calculations. Store mappings between operand indices and
+ // original registers for potential rollback.
+ DenseMap<unsigned, Register> &RegMap =
+ Revivable.try_emplace(RegIdx).first->getSecond();
+ for (auto [Idx, MO] : enumerate(DeleteReg.DefMI->operands())) {
+ if (MO.isReg() && MO.readsReg()) {
+ RegMap.insert({Idx, MO.getReg()});
+ MO.setReg(Register());
+ }
+ }
+ DeleteReg.DefMI->setDesc(TII.get(TargetOpcode::DBG_VALUE));
+ } else {
+ deleteReg(RegIdx);
+ }
+ if (IsRematerializedReg) {
+ // Delete rematerialized register from its origin's rematerializations.
+ RematsOf &OriginRemats = Rematerializations.at(getOriginOf(RegIdx));
+ assert(OriginRemats.contains(RegIdx) && "broken remat<->origin link");
+ OriginRemats.erase(RegIdx);
+ if (OriginRemats.empty())
+ Rematerializations.erase(RegIdx);
+ }
+ LLVM_DEBUG(dbgs() << "** Deleted " << printID(RegIdx) << "\n");
}
- LLVM_DEBUG(--CallDepth);
- return true;
}
-void Rematerializer::deleteReg(unsigned RegIdx) {
+void Rematerializer::deleteReg(RegisterIdx RegIdx) {
Reg &DeleteReg = Regs[RegIdx];
assert(DeleteReg.DefMI && "register was already deleted");
// It is not possible for the deleted instruction to be the upper region
// boundary since we don't ever consider them rematerializable.
- if (Regions[DeleteReg.DefRegion].first == DeleteReg.DefMI)
- Regions[DeleteReg.DefRegion].first =
- std::next(MachineBasicBlock::iterator(DeleteReg.DefMI));
+ MachineBasicBlock::iterator &RegionBegin = Regions[DeleteReg.DefRegion].first;
+ if (RegionBegin == DeleteReg.DefMI)
+ RegionBegin = std::next(MachineBasicBlock::iterator(DeleteReg.DefMI));
LIS.RemoveMachineInstrFromMaps(*DeleteReg.DefMI);
DeleteReg.DefMI->eraseFromParent();
MIRegion.erase(DeleteReg.DefMI);
DeleteReg.DefMI = nullptr;
}
+Rematerializer::Rematerializer(MachineFunction &MF,
+ SmallVectorImpl<RegionBoundaries> &Regions,
+ LiveIntervals &LIS)
+ : Regions(Regions), MRI(MF.getRegInfo()), LIS(LIS),
+ TII(*MF.getSubtarget().getInstrInfo()), TRI(TII.getRegisterInfo()) {
+#ifdef EXPENSIVE_CHECKS
+ // Check that regions are valid.
+ DenseSet<MachineInstr *> SeenMIs;
+ for (const auto &[RegionBegin, RegionEnd] : Regions) {
+ assert(RegionBegin != RegionEnd && "empty region");
+ for (auto MI = RegionBegin; MI != RegionEnd; ++MI) {
+ bool IsNewMI = SeenMIs.insert(&*MI).second;
+ assert(IsNewMI && "overlapping regions");
+ assert(!MI->isTerminator() && "terminator in region");
+ }
+ if (RegionEnd != RegionBegin->getParent()->end()) {
+ bool IsNewMI = SeenMIs.insert(&*RegionEnd).second;
+ assert(IsNewMI && "overlapping regions (upper bound)");
+ }
+ }
+#endif
+}
+
bool Rematerializer::analyze(bool SupportRollback) {
Regs.clear();
UnrematableOprds.clear();
@@ -392,14 +445,13 @@ bool Rematerializer::analyze(bool SupportRollback) {
MIRegion.clear();
RegToIdx.clear();
LISUpdates.clear();
- Rollbackable.clear();
+ Revivable.clear();
this->SupportRollback = SupportRollback;
if (Regions.empty())
return false;
// Initialize MI to containing region mapping.
- const unsigned NumRegions = Regions.size();
- for (unsigned I = 0; I < NumRegions; ++I) {
+ for (unsigned I = 0, E = Regions.size(); I < E; ++I) {
RegionBoundaries Region = Regions[I];
assert(Region.first != Region.second && "empty cannot be region");
for (auto MI = Region.first; MI != Region.second; ++MI) {
@@ -424,7 +476,7 @@ bool Rematerializer::analyze(bool SupportRollback) {
assert(Regs.size() == UnrematableOprds.size());
LLVM_DEBUG({
- for (unsigned I = 0, E = getNumRegs(); I < E; ++I)
+ for (RegisterIdx I = 0, E = getNumRegs(); I < E; ++I)
dbgs() << printDependencyDAG(I) << '\n';
});
return !Regs.empty();
@@ -470,7 +522,7 @@ void Rematerializer::addRegIfRematerializable(unsigned VirtRegIdx,
SmallDenseSet<Register, 4> AllDepRegs;
SmallVector<unsigned, 2> UnrematDeps;
for (const auto &[MOIdx, MO] : enumerate(RematReg.DefMI->operands())) {
- Register DepReg = isRegDependency(MO);
+ Register DepReg = getRegDependency(MO);
if (!DepReg || !AllDepRegs.insert(DepReg).second)
continue;
unsigned DepRegIdx = DepReg.virtRegIndex();
@@ -508,7 +560,7 @@ bool Rematerializer::isMIRematerializable(const MachineInstr &MI) const {
return true;
}
-unsigned Rematerializer::getDefRegIdx(const MachineInstr &MI) const {
+RegisterIdx Rematerializer::getDefRegIdx(const MachineInstr &MI) const {
if (!MI.getNumOperands() || !MI.getOperand(0).isReg() ||
MI.getOperand(0).readsReg())
return NoReg;
@@ -519,11 +571,11 @@ unsigned Rematerializer::getDefRegIdx(const MachineInstr &MI) const {
return UserRegIt->second;
}
-unsigned Rematerializer::rematerializeReg(
- unsigned RegIdx, MachineBasicBlock::iterator InsertPos,
+RegisterIdx Rematerializer::rematerializeReg(
+ RegisterIdx RegIdx, MachineBasicBlock::iterator InsertPos,
SmallVectorImpl<Reg::Dependency> &&Dependencies) {
unsigned UseRegion = MIRegion.at(&*InsertPos);
- unsigned NewRegIdx = Regs.size();
+ RegisterIdx NewRegIdx = Regs.size();
Reg &NewReg = Regs.emplace_back();
Reg &FromReg = Regs[RegIdx];
@@ -534,7 +586,7 @@ unsigned Rematerializer::rematerializeReg(
// Track rematerialization link between registers. Origins are always
// registers that existed originally, and rematerializations are always
// attached to them.
- unsigned OriginIdx =
+ RegisterIdx OriginIdx =
isRematerializedRegister(RegIdx) ? getOriginOf(RegIdx) : RegIdx;
Origins.push_back(OriginIdx);
Rematerializations[OriginIdx].insert(NewRegIdx);
@@ -560,9 +612,9 @@ unsigned Rematerializer::rematerializeReg(
auto ZipedDeps = zip_equal(FromReg.Dependencies, NewReg.Dependencies);
for (const auto &[OldDep, NewDep] : ZipedDeps) {
assert(OldDep.MOIdx == NewDep.MOIdx && "operand mismatch");
- LLVM_DEBUG(rdbgs() << " Operand #" << OldDep.MOIdx << ": "
- << printID(OldDep.RegIdx) << " -> "
- << printID(NewDep.RegIdx) << '\n');
+ LLVM_DEBUG(dbgs() << " Operand #" << OldDep.MOIdx << ": "
+ << printID(OldDep.RegIdx) << " -> "
+ << printID(NewDep.RegIdx) << '\n');
Reg &NewDepReg = Regs[NewDep.RegIdx];
if (OldDep.RegIdx != NewDep.RegIdx) {
@@ -576,8 +628,8 @@ unsigned Rematerializer::rematerializeReg(
}
LLVM_DEBUG({
- rdbgs() << "** Rematerialized " << printID(RegIdx) << " as "
- << printRematReg(NewRegIdx) << '\n';
+ dbgs() << "** Rematerialized " << printID(RegIdx) << " as "
+ << printRematReg(NewRegIdx) << '\n';
});
return NewRegIdx;
}
@@ -629,11 +681,11 @@ void Rematerializer::Reg::eraseUser(MachineInstr *MI, unsigned Region) {
RUsers.erase(MI);
}
-Printable Rematerializer::printDependencyDAG(unsigned RootIdx) const {
+Printable Rematerializer::printDependencyDAG(RegisterIdx RootIdx) const {
return Printable([&, RootIdx](raw_ostream &OS) {
- DenseMap<unsigned, unsigned> RegDepths;
- std::function<void(unsigned, unsigned)> WalkTree =
- [&](unsigned RegIdx, unsigned Depth) -> void {
+ DenseMap<RegisterIdx, unsigned> RegDepths;
+ std::function<void(RegisterIdx, unsigned)> WalkTree =
+ [&](RegisterIdx RegIdx, unsigned Depth) -> void {
unsigned MaxDepth = std::max(RegDepths.lookup_or(RegIdx, Depth), Depth);
RegDepths.emplace_or_assign(RegIdx, MaxDepth);
for (const Reg::Dependency &Dep : getReg(RegIdx).Dependencies)
@@ -642,8 +694,8 @@ Printable Rematerializer::printDependencyDAG(unsigned RootIdx) const {
WalkTree(RootIdx, 0);
// Sort in decreasing depth order to print root at the bottom.
- SmallVector<std::pair<unsigned, unsigned>> Regs(RegDepths.begin(),
- RegDepths.end());
+ SmallVector<std::pair<RegisterIdx, unsigned>> Regs(RegDepths.begin(),
+ RegDepths.end());
sort(Regs, [](const auto &LHS, const auto &RHS) {
return LHS.second > RHS.second;
});
@@ -657,7 +709,7 @@ Printable Rematerializer::printDependencyDAG(unsigned RootIdx) const {
});
}
-Printable Rematerializer::printID(unsigned RegIdx) const {
+Printable Rematerializer::printID(RegisterIdx RegIdx) const {
return Printable([&, RegIdx](raw_ostream &OS) {
const Reg &PrintReg = getReg(RegIdx);
OS << '(' << RegIdx << '/';
@@ -671,7 +723,7 @@ Printable Rematerializer::printID(unsigned RegIdx) const {
});
}
-Printable Rematerializer::printRematReg(unsigned RegIdx,
+Printable Rematerializer::printRematReg(RegisterIdx RegIdx,
bool SkipRegions) const {
return Printable([&, RegIdx, SkipRegions](raw_ostream &OS) {
const Reg &PrintReg = getReg(RegIdx);
@@ -712,7 +764,7 @@ Printable Rematerializer::printRematReg(unsigned RegIdx,
});
}
-Printable Rematerializer::printRegUsers(unsigned RegIdx) const {
+Printable Rematerializer::printRegUsers(RegisterIdx RegIdx) const {
return Printable([&, RegIdx](raw_ostream &OS) {
for (const auto &[_, Users] : getReg(RegIdx).Uses) {
for (MachineInstr *MI : Users)
@@ -723,7 +775,7 @@ Printable Rematerializer::printRegUsers(unsigned RegIdx) const {
Printable Rematerializer::printUser(const MachineInstr *MI) const {
return Printable([&, MI](raw_ostream &OS) {
- unsigned RegIdx = getDefRegIdx(*MI);
+ RegisterIdx RegIdx = getDefRegIdx(*MI);
if (RegIdx != NoReg)
OS << printID(RegIdx);
else
diff --git a/llvm/unittests/CodeGen/RematerializerTest.cpp b/llvm/unittests/CodeGen/RematerializerTest.cpp
index af16774f7b1d0..a697a4a7f3312 100644
--- a/llvm/unittests/CodeGen/RematerializerTest.cpp
+++ b/llvm/unittests/CodeGen/RematerializerTest.cpp
@@ -28,6 +28,7 @@
#include <memory>
using namespace llvm;
+using RegisterIdx = Rematerializer::RegisterIdx;
class RematerializerTest : public testing::Test {
public:
@@ -35,9 +36,11 @@ class RematerializerTest : public testing::Test {
std::unique_ptr<TargetMachine> TM;
std::unique_ptr<Module> M;
std::unique_ptr<MachineModuleInfo> MMI;
+
std::unique_ptr<MIRParser> MIR;
std::unique_ptr<SmallVector<Rematerializer::RegionBoundaries>> Regions;
std::unique_ptr<Rematerializer> Remater;
+ MachineFunction *MF;
LoopAnalysisManager LAM;
MachineFunctionAnalysisManager MFAM;
@@ -77,7 +80,8 @@ class RematerializerTest : public testing::Test {
MAM.registerPass([&] { return MachineModuleAnalysis(*MMI); });
}
- bool parseMIR(StringRef MIRCode) {
+ bool parseMIRAndInit(StringRef MIRCode, StringRef FunName,
+ bool SupportRollback) {
SMDiagnostic Diagnostic;
std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
MIR = createMIRParser(std::move(MBuffer), Context);
@@ -92,29 +96,41 @@ class RematerializerTest : public testing::Test {
return false;
}
- return true;
- }
-
- Rematerializer &getRematerializer(StringRef MIR, StringRef FunName,
- bool SupportRollback) {
- MachineFunction &MF =
- FAM.getResult<MachineFunctionAnalysis>(*M->getFunction(FunName))
- .getMF();
- LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
+ MF = &FAM.getResult<MachineFunctionAnalysis>(*M->getFunction(FunName))
+ .getMF();
+ LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(*MF);
+ // Create regions for the rematerializer. Both MBBs and terminator MIs
+ // delimitate regions.
Regions = std::make_unique<SmallVector<Rematerializer::RegionBoundaries>>();
- /// Each MBB is its own region. This wouldn't be how e.g., the scheduler
- /// would do that but here we only want to test the rematerializer's API so
- /// it is good enough.
- for (MachineBasicBlock &MBB : MF)
- Regions->push_back({MBB.begin(), MBB.end()});
- Remater = std::make_unique<Rematerializer>(MF, *Regions, LIS);
+ MachineInstr *FirstMI = nullptr;
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!FirstMI)
+ FirstMI = &MI;
+ if (MI.isTerminator()) {
+ if (FirstMI != &MI)
+ Regions->push_back({FirstMI, MI});
+ FirstMI = nullptr;
+ }
+ }
+ // End the region at the end of the block.
+ if (FirstMI) {
+ Regions->push_back({FirstMI, MBB.end()});
+ FirstMI = nullptr;
+ }
+ }
+
+ Remater = std::make_unique<Rematerializer>(*MF, *Regions, LIS);
Remater->analyze(SupportRollback);
- return *Remater;
+ return true;
}
+ MachineFunction &getMF() { return *MF; }
+ Rematerializer &getRematerializer() { return *Remater; }
+
/// Returns the number of users of register \p RegIdx.
- unsigned getNumUsers(unsigned RegIdx) {
+ unsigned getNumUsers(RegisterIdx RegIdx) {
unsigned NumUsers = 0;
for (const auto &[_, RegionUses] : Remater->getReg(RegIdx).Uses)
NumUsers += RegionUses.size();
@@ -122,25 +138,23 @@ class RematerializerTest : public testing::Test {
}
/// Returns the size of region \p RegionIdx.
- unsigned getNumRegions(unsigned RegionIdx) {
+ unsigned getRegionSize(unsigned RegionIdx) {
const Rematerializer::RegionBoundaries &Region = (*Regions)[RegionIdx];
return std::distance(Region.first, Region.second);
}
};
-using MBBRegionsVector = SmallVector<SchedRegion, 16>;
-
/// Asserts that region RegionIdx contains RegionSize instructions.
#define ASSERT_REGION_SIZE(RegionIdx, RegionSize) \
- ASSERT_EQ(getNumRegions(RegionIdx), RegionSize)
+ ASSERT_EQ(getRegionSize(RegionIdx), RegionSize)
/// Asserts that regions have sizes RegionSizes, which must be an iterable
/// object with the same number of elements as the number of regions.
#define ASSERT_REGION_SIZES(RegionSizes) \
{ \
ASSERT_EQ(RegionSizes.size(), Regions->size()); \
- for (const auto [RegionIdx, Size] : enumerate(RegionSizes)) \
- ASSERT_REGION_SIZE(RegionIdx, Size); \
+ for (const auto [RegionIdx, ExpectedSize] : enumerate(RegionSizes)) \
+ ASSERT_REGION_SIZE(RegionIdx, ExpectedSize); \
}
/// Expects that register RegIdx in the rematerializer has a total of N users.
@@ -183,20 +197,20 @@ body: |
S_ENDPGM 0
...
)";
- ASSERT_TRUE(parseMIR(MIR));
- Rematerializer &Remater =
- getRematerializer(MIR, "TreeRematRollback", /*SupportRollback=*/true);
+ ASSERT_TRUE(
+ parseMIRAndInit(MIR, "TreeRematRollback", /*SupportRollback=*/true));
+ Rematerializer &Remater = getRematerializer();
Rematerializer::DependencyReuseInfo DRI;
// MBB/Region indices.
const unsigned MBB0 = 0, MBB1 = 1;
- SmallVector<unsigned, 2> RegionSizes{5, 2};
+ SmallVector<unsigned, 2> RegionSizes{5, 1};
ASSERT_REGION_SIZES(RegionSizes);
// Indices of rematerializable registers.
unsigned NumRegs = 0;
- const unsigned Cst0 = NumRegs++, Cst1 = NumRegs++, Add01 = NumRegs++,
- Cst3 = NumRegs++, Add23 = NumRegs++;
+ const RegisterIdx Cst0 = NumRegs++, Cst1 = NumRegs++, Add01 = NumRegs++,
+ Cst3 = NumRegs++, Add23 = NumRegs++;
ASSERT_EQ(Remater.getNumRegs(), NumRegs);
// Rematerialize Add23 with all transitive dependencies.
@@ -256,7 +270,7 @@ body: |
MachineInstr *NopMI = &*(*Regions)[MBB1].first;
DRI.clear().reuse(Cst0).reuse(Cst1);
- const unsigned RematAdd01 =
+ const RegisterIdx RematAdd01 =
Remater.rematerializeToPos(/*RootIdx=*/Add01, NopMI, DRI);
// This adds an additional user to the used constants, and does not change
// existing users for the original register.
@@ -266,14 +280,14 @@ body: |
EXPECT_NUM_USERS(Cst1, 2);
DRI.clear();
- const unsigned RematCst3 =
+ const RegisterIdx RematCst3 =
Remater.rematerializeToPos(/*RootIdx=*/Cst3, NopMI, DRI);
// This does not change existing users for the original register.
EXPECT_NO_USERS(RematCst3);
EXPECT_NUM_USERS(Cst3, 1);
DRI.clear().useRemat(Add01, RematAdd01).useRemat(Cst3, RematCst3);
- const unsigned RematAdd23 =
+ const RegisterIdx RematAdd23 =
Remater.rematerializeToPos(/*RootIdx=*/Add23, NopMI, DRI);
// This adds a user to used rematerializations, and does not change existing
// users for the original register.
@@ -302,6 +316,8 @@ body: |
RegionSizes[MBB0] -= 3;
ASSERT_REGION_SIZES(RegionSizes);
ASSERT_EQ(Remater.getNumRegs(), NumRegs);
+
+ EXPECT_TRUE(getMF().verify());
}
/// Rematerializes a single register to multiple regions, tracking that
@@ -329,22 +345,22 @@ body: |
S_ENDPGM 0
...
)";
- ASSERT_TRUE(parseMIR(MIR));
- Rematerializer &Remater =
- getRematerializer(MIR, "MultiRegionsRemat", /*SupportRollback=*/false);
+ ASSERT_TRUE(
+ parseMIRAndInit(MIR, "MultiRegionsRemat", /*SupportRollback=*/false));
+ Rematerializer &Remater = getRematerializer();
Rematerializer::DependencyReuseInfo DRI;
// MBB/Region indices.
const unsigned MBB0 = 0, MBB1 = 1, MBB2 = 2, MBB3 = 3;
- SmallVector<unsigned, 2> RegionSizes{1, 1, 2, 2};
+ SmallVector<unsigned, 2> RegionSizes{1, 1, 2, 1};
ASSERT_REGION_SIZES(RegionSizes);
// Indices of rematerializable registers.
- const unsigned Cst0 = 0;
+ const RegisterIdx Cst0 = 0;
ASSERT_EQ(Remater.getNumRegs(), 1U);
// Rematerialization to MBB1.
- const unsigned RematBB1 =
+ const RegisterIdx RematBB1 =
Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB1, DRI);
++RegionSizes[MBB1];
ASSERT_REGION_SIZES(RegionSizes);
@@ -352,7 +368,8 @@ body: |
/*NumUsers=*/1);
// Rematerialization to MBB2.
- const unsigned RematBB2 =
+ DRI.clear();
+ const RegisterIdx RematBB2 =
Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB2, DRI);
++RegionSizes[MBB2];
ASSERT_REGION_SIZES(RegionSizes);
@@ -361,7 +378,8 @@ body: |
// Rematerialization to MBB3. Rematerializing to the last original user
// deletes the original register.
- const unsigned RematBB3 =
+ DRI.clear();
+ const RegisterIdx RematBB3 =
Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB3, DRI);
--RegionSizes[MBB0];
++RegionSizes[MBB3];
@@ -370,6 +388,7 @@ body: |
/*NumUsers=*/1);
Remater.updateLiveIntervals();
+ EXPECT_TRUE(getMF().verify());
}
/// Rematerializes a tree of register with some unrematerializable operands to a
@@ -397,19 +416,18 @@ body: |
S_ENDPGM 0
...
)";
- ASSERT_TRUE(parseMIR(MIR));
- Rematerializer &Remater =
- getRematerializer(MIR, "MultiStep", /*SupportRollback=*/false);
+ ASSERT_TRUE(parseMIRAndInit(MIR, "MultiStep", /*SupportRollback=*/false));
+ Rematerializer &Remater = getRematerializer();
Rematerializer::DependencyReuseInfo DRI;
// MBB/Region indices.
const unsigned MBB0 = 0, MBB1 = 1, MBB2 = 2;
- SmallVector<unsigned, 2> RegionSizes{4, 1, 2};
+ SmallVector<unsigned, 2> RegionSizes{4, 1, 1};
ASSERT_REGION_SIZES(RegionSizes);
// Indices of rematerializable registers.
unsigned NumRegs = 0;
- const unsigned Cst0 = NumRegs++, Add01 = NumRegs++, Add22 = NumRegs++;
+ const RegisterIdx Cst0 = NumRegs++, Add01 = NumRegs++, Add22 = NumRegs++;
ASSERT_EQ(Remater.getNumRegs(), NumRegs);
// Rematerialize Add01 from the first to the second block along with its
@@ -418,7 +436,7 @@ body: |
// constant 0 remains in the first block because it has a user there, but the
// add is deleted.
Remater.rematerializeToRegion(/*RootIdx=*/Add01, /*UseRegion=*/MBB1, DRI);
- const unsigned RematCst0 = NumRegs++, RematAdd01 = NumRegs++;
+ const RegisterIdx RematCst0 = NumRegs++, RematAdd01 = NumRegs++;
RegionSizes[MBB0] -= 1;
RegionSizes[MBB1] += 2;
ASSERT_REGION_SIZES(RegionSizes);
@@ -435,9 +453,9 @@ body: |
// Rematerialize Add22 from the second to the third block, which will
// also indirectly rematerialize RematAdd01; make sure the latter's
// rematerializations's origin is the original register, not RematAdd01.
- DRI.reuse(RematCst0);
+ DRI.clear().reuse(RematCst0);
Remater.rematerializeToRegion(/*RootIdx=*/Add22, /*UseRegion=*/MBB2, DRI);
- const unsigned RematRematAdd01 = NumRegs++, RematAdd22 = NumRegs++;
+ const RegisterIdx RematRematAdd01 = NumRegs++, RematAdd22 = NumRegs++;
RegionSizes[MBB1] -= 2;
RegionSizes[MBB2] += 2;
ASSERT_REGION_SIZES(RegionSizes);
@@ -449,4 +467,126 @@ body: |
/*NumUsers=*/1);
Remater.updateLiveIntervals();
+ EXPECT_TRUE(getMF().verify());
+}
+
+/// Checks that it is possible to rematerialize inside a region that was
+/// rendered empty by previous rematerializations (as long as the region ends
+/// with a terminator).
+TEST_F(RematerializerTest, EmptyRegion) {
+ StringRef MIR = R"(
+name: EmptyRegion
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+
+ bb.1:
+ %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+
+ bb.2:
+ %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_NOP 0, implicit %0, implicit %1
+ S_NOP 0, implicit %2, implicit %3
+ S_ENDPGM 0
+...
+)";
+ ASSERT_TRUE(parseMIRAndInit(MIR, "EmptyRegion", /*SupportRollback=*/false));
+ Rematerializer &Remater = getRematerializer();
+ Rematerializer::DependencyReuseInfo DRI;
+
+ // MBB/Region indices.
+ const unsigned MBB0 = 0, MBB1 = 1, MBB2 = 2, MBB3 = 3;
+ SmallVector<unsigned, 2> RegionSizes{2, 1, 1, 2};
+ ASSERT_REGION_SIZES(RegionSizes);
+
+ // Indices of rematerializable registers.
+ unsigned NumRegs = 0;
+ const RegisterIdx Cst0 = NumRegs++, Cst1 = NumRegs++, Cst2 = NumRegs++,
+ Cst3 = NumRegs++;
+ ASSERT_EQ(Remater.getNumRegs(), NumRegs);
+
+ // After rematerializing %2 and %3 to bb.3, their respective original defining
+ // regions are empty. %2's region ends at the end of its parent block, whereas
+ // %3's region ends at a terminator MI (S_BRANCH).
+ Remater.rematerializeToRegion(/*RootIdx=*/Cst2, /*UseRegion=*/MBB3, DRI);
+ Remater.rematerializeToRegion(/*RootIdx=*/Cst3, /*UseRegion=*/MBB3, DRI);
+ RegionSizes[MBB1] -= 1;
+ RegionSizes[MBB2] -= 1;
+ RegionSizes[MBB3] += 2;
+ ASSERT_REGION_SIZES(RegionSizes);
+
+ // We can move %0 and %1 to bb.2 because the boundary of bb.2's region points
+ // to the region terminator (S_BRANCH), which is a valid position to insert
+ // before. We couldn't move them to bb.1 however, since after %2 is
+ // rematerialized there is no MI left to reference inside the region.
+ RegisterIdx RematCst0 =
+ Remater.rematerializeToPos(/*RootIdx=*/Cst0, (*Regions)[MBB2].first, DRI);
+ RegisterIdx RematCst1 =
+ Remater.rematerializeToPos(/*RootIdx=*/Cst1, (*Regions)[MBB2].first, DRI);
+ Remater.transferRegionUsers(Cst0, RematCst0, MBB3);
+ Remater.transferRegionUsers(Cst1, RematCst1, MBB3);
+ RegionSizes[MBB0] -= 2;
+ RegionSizes[MBB2] += 2;
+ ASSERT_REGION_SIZES(RegionSizes);
+
+ Remater.updateLiveIntervals();
+ EXPECT_TRUE(getMF().verify());
+}
+
+/// Checks that only registers with a single definition are rematerializable,
+/// even when registers are made up of multiple sub-registers each with their
+/// own definition.
+TEST_F(RematerializerTest, SubReg) {
+ StringRef MIR = R"(
+name: SubReg
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ undef %01.sub0:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ %01.sub1:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+
+ undef %2.sub0:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+
+ undef %34.sub0:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+
+ bb.1:
+ %34.sub1:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ S_NOP 0, implicit %01, implicit %2, implicit %34
+ S_ENDPGM 0
+...
+)";
+ ASSERT_TRUE(parseMIRAndInit(MIR, "SubReg", /*SupportRollback=*/false));
+ Rematerializer &Remater = getRematerializer();
+ Rematerializer::DependencyReuseInfo DRI;
+
+ // MBB/Region indices.
+ const unsigned MBB0 = 0, MBB1 = 1;
+ SmallVector<unsigned, 2> RegionSizes{4, 2};
+ ASSERT_REGION_SIZES(RegionSizes);
+
+ // Indices of rematerializable registers.
+ unsigned NumRegs = 0;
+ const RegisterIdx Cst2 = NumRegs++;
+ ASSERT_EQ(Remater.getNumRegs(), NumRegs);
+
+ RegisterIdx RematCst2 =
+ Remater.rematerializeToRegion(/*RootIdx=*/Cst2, /*UseRegion=*/MBB1, DRI);
+ RegionSizes[MBB0] -= 1;
+ RegionSizes[MBB1] += 1;
+ ASSERT_REGION_SIZES(RegionSizes);
+ EXPECT_REMAT(/*RegIdx=*/RematCst2, /*OriginIdx=*/Cst2,
+ /*DefRegionIdx=*/MBB1,
+ /*NumUsers=*/1);
+
+ Remater.updateLiveIntervals();
+ EXPECT_TRUE(getMF().verify());
}
More information about the llvm-commits
mailing list