[llvm] [RegAllocFast] Replace UsedInInstr with vector (PR #96323)

Alexis Engelke via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 21 08:58:49 PDT 2024


https://github.com/aengelke created https://github.com/llvm/llvm-project/pull/96323

A SparseSet adds an avoidable layer of indirection and possibly looping control flow. Avoid this overhead by using a vector to store UsedInInstrs and PhysRegUses.

To avoid clearing the vector after every instruction, use a monotonically increasing counter. The two maps are now merged and the lowest bit indicates whether the use is relevant for the livethrough handling code only.

---

http://llvm-compile-time-tracker.com/compare.php?from=739a9605677dd736971b17a7888f9d18fd245904&to=86e8efec8f9e09e7ed0aaaf47d5bea3fe793b1f6&stat=instructions:u

Not really related: iterating over regunits isn't fast. I assume (but haven't verified) that this comes from data dependencies from the diff list storage. I think a more efficient way to map regs to reg units, maybe even by reordering enum values, would improve performance. (It's not really slow either, but it happens extremely often and is therefore noticable.)

>From 86e8efec8f9e09e7ed0aaaf47d5bea3fe793b1f6 Mon Sep 17 00:00:00 2001
From: Alexis Engelke <engelke at in.tum.de>
Date: Fri, 21 Jun 2024 15:54:09 +0200
Subject: [PATCH] [RegAllocFast] Replace UsedInInstr with vector

A SparseSet adds an avoidable layer of indirection and possibly looping
control flow. Avoid this overhead by using a vector instead to store
UsedInInstrs and PhysRegUses.

To avoid clearing the vector after every instruction, use a
monotonically increasing counter. The two maps are now merged and the
lowest bit indicates whether the use is relevant for the livethrough
handling code only.
---
 llvm/lib/CodeGen/RegAllocFast.cpp | 48 ++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index fa9b7576bf9ed..3a29a703a88a3 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -253,11 +253,19 @@ class RegAllocFastImpl {
 
   SmallVector<MachineInstr *, 32> Coalesced;
 
-  using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
-  /// Set of register units that are used in the current instruction, and so
+  /// Track register units that are used in the current instruction, and so
   /// cannot be allocated.
-  RegUnitSet UsedInInstr;
-  RegUnitSet PhysRegUses;
+  ///
+  /// In the first phase (tied defs/early clobber), we consider also physical
+  /// uses, afterwards, we don't. if the lowest bit isn't set, it's a solely
+  /// physical use (markPhysRegUsedInInstr), otherwise, it's a normal use. To
+  /// avoid resetting the entire vector after every instruction, we track the
+  /// instruction "generation" in the remaining 31 bits -- this meands, that if
+  /// UsedInInstr[Idx] < InstrGen, the register unit is unused. InstrGen is
+  /// never zero and always incremented by two.
+  uint32_t InstrGen;
+  SmallVector<unsigned, 0> UsedInInstr;
+
   SmallVector<unsigned, 8> DefOperandIndexes;
   // Register masks attached to the current instruction.
   SmallVector<const uint32_t *> RegMasks;
@@ -271,7 +279,7 @@ class RegAllocFastImpl {
   /// Mark a physreg as used in this instruction.
   void markRegUsedInInstr(MCPhysReg PhysReg) {
     for (MCRegUnit Unit : TRI->regunits(PhysReg))
-      UsedInInstr.insert(Unit);
+      UsedInInstr[Unit] = InstrGen | 1;
   }
 
   // Check if physreg is clobbered by instruction's regmask(s).
@@ -285,26 +293,25 @@ class RegAllocFastImpl {
   bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
     if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
       return true;
-    for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
-      if (UsedInInstr.count(Unit))
-        return true;
-      if (LookAtPhysRegUses && PhysRegUses.count(Unit))
+    for (MCRegUnit Unit : TRI->regunits(PhysReg))
+      if (UsedInInstr[Unit] >= (InstrGen | !LookAtPhysRegUses))
         return true;
-    }
     return false;
   }
 
   /// Mark physical register as being used in a register use operand.
   /// This is only used by the special livethrough handling code.
   void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
-    for (MCRegUnit Unit : TRI->regunits(PhysReg))
-      PhysRegUses.insert(Unit);
+    for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+      assert(UsedInInstr[Unit] <= InstrGen && "non-phys use before phys use?");
+      UsedInInstr[Unit] = InstrGen;
+    }
   }
 
   /// Remove mark of physical register being used in the instruction.
   void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
     for (MCRegUnit Unit : TRI->regunits(PhysReg))
-      UsedInInstr.erase(Unit);
+      UsedInInstr[Unit] = 0;
   }
 
   enum : unsigned {
@@ -1382,7 +1389,12 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
   // - The "free def operands" step has to come last instead of first for tied
   //   operands and early-clobbers.
 
-  UsedInInstr.clear();
+  InstrGen += 2;
+  // In the event we ever get more than 2**31 instructions...
+  if (InstrGen == 0) {
+    UsedInInstr.assign(UsedInInstr.size(), 0);
+    InstrGen = 2;
+  }
   RegMasks.clear();
   BundleVirtRegsMap.clear();
 
@@ -1443,8 +1455,6 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
       //   heuristic to figure out a good operand order before doing
       //   assignments.
       if (NeedToAssignLiveThroughs) {
-        PhysRegUses.clear();
-
         while (ReArrangedImplicitOps) {
           ReArrangedImplicitOps = false;
           findAndSortDefOperandIndexes(MI);
@@ -1769,10 +1779,8 @@ bool RegAllocFastImpl::runOnMachineFunction(MachineFunction &MF) {
   MRI->freezeReservedRegs();
   RegClassInfo.runOnMachineFunction(MF);
   unsigned NumRegUnits = TRI->getNumRegUnits();
-  UsedInInstr.clear();
-  UsedInInstr.setUniverse(NumRegUnits);
-  PhysRegUses.clear();
-  PhysRegUses.setUniverse(NumRegUnits);
+  InstrGen = 0;
+  UsedInInstr.assign(NumRegUnits, 0);
 
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers



More information about the llvm-commits mailing list