[llvm] [AMDGPU] Speed up SIRegisterInfo::getReservedRegs (PR #79610)

Sat Jan 27 03:15:43 PST 2024

================
@@ -693,20 +699,29 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     }
   }
 
-  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
-    unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
-    reserveRegisterTuples(Reserved, Reg);
+  for (const TargetRegisterClass *RC : regclasses()) {
+    if (RC->isBaseClass() && isVGPRClass(RC)) {
+      unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
+      for (MCPhysReg Reg : *RC) {
+        unsigned Index = getHWRegIndex(Reg);
+        if (Index + NumRegs > MaxNumVGPRs)
+          Reserved.set(Reg);
+      }
+    }
   }
 
-  if (ST.hasMAIInsts()) {
-    for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
-      unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
-      reserveRegisterTuples(Reserved, Reg);
+  // Reserve all the AGPRs if there are no instructions to use it.
+  if (!ST.hasMAIInsts())
+    MaxNumAGPRs = 0;
+  for (const TargetRegisterClass *RC : regclasses()) {
----------------
jayfoad wrote:

True, but I don't expect that to affect the speed much since it won't change how many times the expensive inner loop is executed.

https://github.com/llvm/llvm-project/pull/79610