[llvm] 7714e03 - RegAllocGreedy: Allow last chance recolor to retry overlapping tuples

Mon Apr 25 14:07:23 PDT 2022

Author: Matt Arsenault
Date: 2022-04-25T17:07:17-04:00
New Revision: 7714e0317520207572168388f22012dd9e152e9e

URL: https://github.com/llvm/llvm-project/commit/7714e0317520207572168388f22012dd9e152e9e
DIFF: https://github.com/llvm/llvm-project/commit/7714e0317520207572168388f22012dd9e152e9e.diff

LOG: RegAllocGreedy: Allow last chance recolor to retry overlapping tuples

Last chance recoloring didn't try recoloring a done register with the
same class since it believed there was no point. This doesn't
necessarily apply if the members in that class overlap. Allow the
recoloring to proceed if the assigned interfering physical register
overlaps with the candidate register.

This avoids an allocation failure with overlapping tuples. This
testcase could be handled better, and I don't believe should reach
last chance recoloring. The failure only manifests with the mutually
unsatisfiable register hints to overlapping tuples. The earlier
assignment decisions probably should have figured out that using these
hints was a bad idea.

Added: 
    llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir

Modified: 
    llvm/lib/CodeGen/RegAllocGreedy.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 029f05d0b5072..a6fc9b1ee011c 100644

--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1833,6 +1833,18 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
   return false;
 }
 
+/// Return true if the existing assignment of \p Intf overlaps, but is not the
+/// same, as \p PhysReg.
+static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
+                                         const VirtRegMap &VRM,
+                                         MCRegister PhysReg,
+                                         const LiveInterval &Intf) {
+  MCRegister AssignedReg = VRM.getPhys(Intf.reg());
+  if (PhysReg == AssignedReg)
+    return false;
+  return TRI.regsOverlap(PhysReg, AssignedReg);
+}
+
 /// mayRecolorAllInterferences - Check if the virtual registers that
 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
 /// recolored to free \p PhysReg.
@@ -1858,12 +1870,20 @@ bool RAGreedy::mayRecolorAllInterferences(
       return false;
     }
     for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
-      // If Intf is done and sit on the same register class as VirtReg,
-      // it would not be recolorable as it is in the same state as VirtReg.
-      // However, if VirtReg has tied defs and Intf doesn't, then
+      // If Intf is done and sits on the same register class as VirtReg, it
+      // would not be recolorable as it is in the same state as
+      // VirtReg. However there are at least two exceptions.
+      //
+      // If VirtReg has tied defs and Intf doesn't, then
       // there is still a point in examining if it can be recolorable.
+      //
+      // Additionally, if the register class has overlapping tuple members, it
+      // may still be recolorable using a 
diff erent tuple. This is more likely
+      // if the existing assignment aliases with the candidate.
+      //
       if (((ExtraInfo->getStage(*Intf) == RS_Done &&
-            MRI->getRegClass(Intf->reg()) == CurRC) &&
+            MRI->getRegClass(Intf->reg()) == CurRC &&
+            !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
            !(hasTiedDef(MRI, VirtReg.reg()) &&
              !hasTiedDef(MRI, Intf->reg()))) ||
           FixedRegisters.count(Intf->reg())) {

diff  --git a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
new file mode 100644
index 0000000000000..09be927dc952e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
@@ -0,0 +1,84 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=greedy -o - %s | FileCheck %s
+
+# This testcase is restricted to use a maximum of 24 VGPRs. It is
+# therefore possible to allocate a maximum of 3 vreg_256s at a
+# time. The apparent number of registers in the class is larger, but
+# each one overlaps with the next. Allocating a vreg_64 will prevent a
+# full vreg_256 from being live at a given point.
+
+# The hints are trying to force allocation of overlapping vreg_256s
+# which cannot be satisfied. The last S_NOP in %bb.0 with 2 vreg_256s
+# and a vreg_64 use can be satisfied as long as the hints are ignored.
+
+# With the resulting allocation order, this ends up using last chance
+# recoloring for a vreg_256. We should try to recolor for completed
+# virtual registers with the same class, since the existing assignment
+# can only be corrected by adjusting to a non-overlapping register.
+
+--- |
+  define void @recolor_impossible_hint() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+---
+
+---
+name:            recolor_impossible_hint
+alignment:       1
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vreg_256, preferred-register: '$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7' }
+  - { id: 1, class: vreg_256, preferred-register: '$vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8' }
+  - { id: 2, class: vreg_256, preferred-register: '$vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9' }
+  - { id: 3, class: vreg_256, preferred-register: '$vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10' }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  occupancy:       10
+body:             |
+  ; CHECK-LABEL: name: recolor_impossible_hint
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit-def %7, implicit-def %19, implicit-def %5
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %19, %stack.3, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.3, align 4, addrspace 5)
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %7, %stack.1, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.1, align 4, addrspace 5)
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit-def %17
+  ; CHECK-NEXT:   SI_SPILL_V256_SAVE %17, %stack.2, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.2, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit-def %4
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5)
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V256_RESTORE]], implicit [[SI_SPILL_V256_RESTORE1]], implicit %4
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vreg_256 = COPY [[SI_SPILL_V256_RESTORE1]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY]]
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V256_RESTORE2]]
+  ; CHECK-NEXT:   [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5)
+  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V256_RESTORE3]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    S_NOP 0, implicit-def %0:vreg_256, implicit-def %1:vreg_256, implicit-def %2:vreg_256
+    S_NOP 0, implicit-def %3:vreg_256
+    S_NOP 0, implicit-def %4:vreg_64
+    S_NOP 0, implicit %0, implicit %1, implicit %4
+    S_CBRANCH_EXECNZ %bb.3, implicit $exec
+
+  bb.2:
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+    S_NOP 0, implicit %3
+
+  bb.3:
+    S_ENDPGM 0
+
+...