[llvm] [RegAllocFast] Handle single-vdef instrs faster (PR #96284)

Alexis Engelke via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 21 01:34:27 PDT 2024


https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/96284

>From b907bc97c0ff974e323eda06db0468cbd16626e7 Mon Sep 17 00:00:00 2001
From: Alexis Engelke <engelke at in.tum.de>
Date: Fri, 21 Jun 2024 09:23:07 +0200
Subject: [PATCH 1/2] [RegAllocFast] Handle single-vdef instrs faster

On x86, most instructions have tied operands, so allocateInstruction
uses the more complex assignment strategy which computes the assignment
order of virtual defs first. This involves iterating over all register
classes (or register aliases for physical defs) to compute the possible
number of defs per register class.

However, this information is only used for sorting virtual defs and
therefore not required when there's only one virtual def -- which is a
very common case. As iterating over all register classes/aliases is not
cheap, do this only when there's more than one virtual def.
---
 llvm/lib/CodeGen/RegAllocFast.cpp | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 09ce8c42a3850..d194445abbfc8 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -1289,10 +1289,6 @@ void RegAllocFastImpl::addRegClassDefCounts(
 void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) {
   DefOperandIndexes.clear();
 
-  // Track number of defs which may consume a register from the class.
-  std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
-  assert(RegClassDefCounts[0] == 0);
-
   LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
   for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
     const MachineOperand &MO = MI.getOperand(I);
@@ -1306,14 +1302,27 @@ void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) {
       }
     }
 
-    if (MO.isDef()) {
-      if (Reg.isVirtual() && shouldAllocateRegister(Reg))
-        DefOperandIndexes.push_back(I);
-
-      addRegClassDefCounts(RegClassDefCounts, Reg);
-    }
+    if (MO.isDef() && Reg.isVirtual() && shouldAllocateRegister(Reg))
+      DefOperandIndexes.push_back(I);
   }
 
+  // Most instructions only have one virtual def, so there's no point in
+  // computing the possible number of defs for every register class.
+  if (DefOperandIndexes.size() <= 1)
+    return;
+
+  // Track number of defs which may consume a register from the class. This is
+  // used to assign registers for possibly-too-small classes first. Example:
+  // defs are eax, 3 * gr32_abcd, 2 * gr32 => we want to assign the gr32_abcd
+  // registers first so that the gr32 don't use the gr32_abcd registers before
+  // we assign these.
+  std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
+  assert(RegClassDefCounts[0] == 0);
+
+  for (const MachineOperand &MO : MI.operands())
+    if (MO.isReg() && MO.isDef())
+      addRegClassDefCounts(RegClassDefCounts, MO.getReg());
+
   llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
     const MachineOperand &MO0 = MI.getOperand(I0);
     const MachineOperand &MO1 = MI.getOperand(I1);

>From acfe09eae0aa029b6bdfe171c33a19ce8ce9bbe9 Mon Sep 17 00:00:00 2001
From: Alexis Engelke <engelke at in.tum.de>
Date: Fri, 21 Jun 2024 08:30:03 +0000
Subject: [PATCH 2/2] SmallVector and uint16_t -> unsigned

---
 llvm/lib/CodeGen/RegAllocFast.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index d194445abbfc8..0348d7a4a6b9b 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -258,7 +258,7 @@ class RegAllocFastImpl {
   /// cannot be allocated.
   RegUnitSet UsedInInstr;
   RegUnitSet PhysRegUses;
-  SmallVector<uint16_t, 8> DefOperandIndexes;
+  SmallVector<unsigned, 8> DefOperandIndexes;
   // Register masks attached to the current instruction.
   SmallVector<const uint32_t *> RegMasks;
 
@@ -322,7 +322,7 @@ class RegAllocFastImpl {
 private:
   void allocateBasicBlock(MachineBasicBlock &MBB);
 
-  void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+  void addRegClassDefCounts(MutableArrayRef<unsigned> RegClassDefCounts,
                             Register Reg) const;
 
   void findAndSortDefOperandIndexes(const MachineInstr &MI);
@@ -1253,7 +1253,7 @@ void RegAllocFastImpl::dumpState() const {
 
 /// Count number of defs consumed from each register class by \p Reg
 void RegAllocFastImpl::addRegClassDefCounts(
-    std::vector<unsigned> &RegClassDefCounts, Register Reg) const {
+    MutableArrayRef<unsigned> RegClassDefCounts, Register Reg) const {
   assert(RegClassDefCounts.size() == TRI->getNumRegClasses());
 
   if (Reg.isVirtual()) {
@@ -1316,14 +1316,14 @@ void RegAllocFastImpl::findAndSortDefOperandIndexes(const MachineInstr &MI) {
   // defs are eax, 3 * gr32_abcd, 2 * gr32 => we want to assign the gr32_abcd
   // registers first so that the gr32 don't use the gr32_abcd registers before
   // we assign these.
-  std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
+  SmallVector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
   assert(RegClassDefCounts[0] == 0);
 
   for (const MachineOperand &MO : MI.operands())
     if (MO.isReg() && MO.isDef())
       addRegClassDefCounts(RegClassDefCounts, MO.getReg());
 
-  llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
+  llvm::sort(DefOperandIndexes, [&](unsigned I0, unsigned I1) {
     const MachineOperand &MO0 = MI.getOperand(I0);
     const MachineOperand &MO1 = MI.getOperand(I1);
     Register Reg0 = MO0.getReg();
@@ -1448,7 +1448,7 @@ void RegAllocFastImpl::allocateInstruction(MachineInstr &MI) {
         while (ReArrangedImplicitOps) {
           ReArrangedImplicitOps = false;
           findAndSortDefOperandIndexes(MI);
-          for (uint16_t OpIdx : DefOperandIndexes) {
+          for (unsigned OpIdx : DefOperandIndexes) {
             MachineOperand &MO = MI.getOperand(OpIdx);
             LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
             Register Reg = MO.getReg();



More information about the llvm-commits mailing list