[llvm] [RegAllocFast] Replace UsedInInstr with vector (PR #96323)

Alexis Engelke via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 21 10:27:13 PDT 2024


https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/96323

>From 86e8efec8f9e09e7ed0aaaf47d5bea3fe793b1f6 Mon Sep 17 00:00:00 2001
From: Alexis Engelke <engelke at in.tum.de>
Date: Fri, 21 Jun 2024 15:54:09 +0200
Subject: [PATCH 1/2] [RegAllocFast] Replace UsedInInstr with vector

A SparseSet adds an avoidable layer of indirection and possibly looping
control flow. Avoid this overhead by using a vector instead to store
UsedInInstrs and PhysRegUses.

To avoid clearing the vector after every instruction, use a
monotonically increasing counter. The two maps are now merged and the
lowest bit indicates whether the use is relevant for the livethrough
handling code only.
---
 llvm/lib/CodeGen/RegAllocFast.cpp | 48 ++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index fa9b7576bf9ed..3a29a703a88a3 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -253,11 +253,19 @@ class RegAllocFastImpl {
 
   SmallVector<MachineInstr *, 32> Coalesced;
 
-  using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
-  /// Set of register units that are used in the current instruction, and so
+  /// Track register units that are used in the current instruction, and so
   /// cannot be allocated.
-  RegUnitSet UsedInInstr;
-  RegUnitSet PhysRegUses;
+  ///
+  /// In the first phase (tied defs/early clobber), we consider also physical
+  /// uses, afterwards, we don't. if the lowest bit isn't set, it's a solely
+  /// physical use (markPhysRegUsedInInstr), otherwise, it's a normal use. To
+  /// avoid resetting the entire vector after every instruction, we track the
+  /// instruction "generation" in the remaining 31 bits -- this meands, that if
+  /// UsedInInstr[Idx] < InstrGen, the register unit is unused. InstrGen is
+  /// never zero and always incremented by two.
+  uint32_t InstrGen;
+  SmallVector<unsigned, 0> UsedInInstr;
+
   SmallVector<unsigned, 8> DefOperandIndexes;
   // Register masks attached to the current instruction.
   SmallVector<const uint32_t *> RegMasks;
@@ -271,7 +279,7 @@ class RegAllocFastImpl {
   /// Mark a physreg as used in this instruction.
   void markRegUsedInInstr(MCPhysReg PhysReg) {
     for (MCRegUnit Unit : TRI->regunits(PhysReg))
-      UsedInInstr.insert(Unit);
+      UsedInInstr[Unit] = InstrGen | 1;
   }
 
   // Check if physreg is clobbered by instruction's regmask(s).
@@ -285,26 +293,25 @@ class RegAllocFastImpl {
   bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
     if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
       return true;
-    for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
-      if (UsedInInstr.count(Unit))
-        return true;
-      if (LookAtPhysRegUses && PhysRegUses.count(Unit))
+    for (MCRegUnit Unit : TRI->regunits(PhysReg))
+      if (UsedInInstr[Unit] >= (InstrGen | !LookAtPhysRegUses))
         return true;
-    }
     return false;
   }
 
   /// Mark physical register as being used in a register use operand.
   /// This is only used by the special livethrough handling code.
   void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
-    for (MCRegUnit Unit : TRI->regunits(PhysReg))
-      PhysRegUses.insert(Unit);
+    for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+      assert(UsedInInstr[Unit] <= InstrGen && "non-phys use before phys use?");
+      UsedInInstr[Unit] = InstrGen;
+    }
   }
 
   /// Remove mark of physical register being used in the instruction.
   void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
     for (MCRegUnit Unit : TRI->regunits(PhysReg))
-      UsedInInstr.erase(Unit);
+      UsedInInstr[Unit] = 0;
   }
 
   enum : unsigned {
@@ -1382,7 +1389,12 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
   // - The "free def operands" step has to come last instead of first for tied
   //   operands and early-clobbers.
 
-  UsedInInstr.clear();
+  InstrGen += 2;
+  // In the event we ever get more than 2**31 instructions...
+  if (InstrGen == 0) {
+    UsedInInstr.assign(UsedInInstr.size(), 0);
+    InstrGen = 2;
+  }
   RegMasks.clear();
   BundleVirtRegsMap.clear();
 
@@ -1443,8 +1455,6 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
       //   heuristic to figure out a good operand order before doing
       //   assignments.
       if (NeedToAssignLiveThroughs) {
-        PhysRegUses.clear();
-
         while (ReArrangedImplicitOps) {
           ReArrangedImplicitOps = false;
           findAndSortDefOperandIndexes(MI);
@@ -1769,10 +1779,8 @@ bool RegAllocFastImpl::runOnMachineFunction(MachineFunction &MF) {
   MRI->freezeReservedRegs();
   RegClassInfo.runOnMachineFunction(MF);
   unsigned NumRegUnits = TRI->getNumRegUnits();
-  UsedInInstr.clear();
-  UsedInInstr.setUniverse(NumRegUnits);
-  PhysRegUses.clear();
-  PhysRegUses.setUniverse(NumRegUnits);
+  InstrGen = 0;
+  UsedInInstr.assign(NumRegUnits, 0);
 
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers

>From 950e0c9b7cdb966b97b3e5f290117917cce75e5c Mon Sep 17 00:00:00 2001
From: Alexis Engelke <engelke at in.tum.de>
Date: Fri, 21 Jun 2024 17:25:50 +0000
Subject: [PATCH 2/2] Fix comment + add LLVM_UNLIKELY

---
 llvm/lib/CodeGen/RegAllocFast.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 3a29a703a88a3..f68f67fefa92c 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -257,12 +257,15 @@ class RegAllocFastImpl {
   /// cannot be allocated.
   ///
   /// In the first phase (tied defs/early clobber), we consider also physical
-  /// uses, afterwards, we don't. if the lowest bit isn't set, it's a solely
+  /// uses, afterwards, we don't. If the lowest bit isn't set, it's a solely
   /// physical use (markPhysRegUsedInInstr), otherwise, it's a normal use. To
   /// avoid resetting the entire vector after every instruction, we track the
-  /// instruction "generation" in the remaining 31 bits -- this meands, that if
+  /// instruction "generation" in the remaining 31 bits -- this means, that if
   /// UsedInInstr[Idx] < InstrGen, the register unit is unused. InstrGen is
   /// never zero and always incremented by two.
+  ///
+  /// Don't allocate inline storage: the number of register units is typically
+  /// quite large (e.g., AArch64 > 100, X86 > 200, AMDGPU > 1000).
   uint32_t InstrGen;
   SmallVector<unsigned, 0> UsedInInstr;
 
@@ -1391,7 +1394,7 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
 
   InstrGen += 2;
   // In the event we ever get more than 2**31 instructions...
-  if (InstrGen == 0) {
+  if (LLVM_UNLIKELY(InstrGen == 0)) {
     UsedInInstr.assign(UsedInInstr.size(), 0);
     InstrGen = 2;
   }



More information about the llvm-commits mailing list