[llvm] r343004 - [RegAllocGreedy] avoid using physreg candidates that cannot be correctly spilled

Daniil Fukalov via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 25 11:37:38 PDT 2018


Author: dfukalov
Date: Tue Sep 25 11:37:38 2018
New Revision: 343004

URL: http://llvm.org/viewvc/llvm-project?rev=343004&view=rev
Log:
[RegAllocGreedy] avoid using physreg candidates that cannot be correctly spilled

For the AMDGPU target if a MBB contains exec mask restore preamble, SplitEditor may get state when it cannot insert a spill instruction.

E.g. for a MIR

bb.100:
    %1 = S_OR_SAVEEXEC_B64 %2, implicit-def $exec, implicit-def $scc, implicit $exec
and if the regalloc will try to allocate a virtreg to the physreg already assigned to virtreg %1, it should insert spill instruction before the S_OR_SAVEEXEC_B64 instruction.
But it is not possible since can generate incorrect code in terms of exec mask.

The change makes regalloc to ignore such physreg candidates.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D52052

Added:
    llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir
Modified:
    llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
    llvm/trunk/lib/CodeGen/SplitKit.h

Modified: llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp?rev=343004&r1=343003&r2=343004&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp Tue Sep 25 11:37:38 2018
@@ -449,8 +449,8 @@ private:
 
   BlockFrequency calcSpillCost();
   bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
-  void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
-  void growRegion(GlobalSplitCandidate &Cand);
+  bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+  bool growRegion(GlobalSplitCandidate &Cand);
   bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
                                   unsigned BBNumber,
                                   const AllocationOrder &Order);
@@ -1203,6 +1203,13 @@ bool RAGreedy::addSplitConstraints(Inter
       } else if (Intf.first() < BI.LastInstr) {
         ++Ins;
       }
+
+      // Abort if the spill cannot be inserted at the MBB' start
+      if (((BC.Entry == SpillPlacement::MustSpill) ||
+           (BC.Entry == SpillPlacement::PrefSpill)) &&
+          SlotIndex::isEarlierInstr(BI.FirstInstr,
+                                    SA->getFirstSplitPoint(BC.Number)))
+        return false;
     }
 
     // Interference for the live-out value.
@@ -1232,7 +1239,7 @@ bool RAGreedy::addSplitConstraints(Inter
 
 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
 /// live-through blocks in Blocks.
-void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
                                      ArrayRef<unsigned> Blocks) {
   const unsigned GroupSize = 8;
   SpillPlacement::BlockConstraint BCS[GroupSize];
@@ -1256,6 +1263,12 @@ void RAGreedy::addThroughConstraints(Int
     assert(B < GroupSize && "Array overflow");
     BCS[B].Number = Number;
 
+    // Abort if the spill cannot be inserted at the MBB' start
+    MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
+    if (!MBB->empty() &&
+        SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()),
+                                  SA->getFirstSplitPoint(Number)))
+      return false;
     // Interference for the live-in value.
     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
       BCS[B].Entry = SpillPlacement::MustSpill;
@@ -1276,9 +1289,10 @@ void RAGreedy::addThroughConstraints(Int
 
   SpillPlacer->addConstraints(makeArrayRef(BCS, B));
   SpillPlacer->addLinks(makeArrayRef(TBS, T));
+  return true;
 }
 
-void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
   // Keep track of through blocks that have not been added to SpillPlacer.
   BitVector Todo = SA->getThroughBlocks();
   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
@@ -1314,9 +1328,10 @@ void RAGreedy::growRegion(GlobalSplitCan
     // Compute through constraints from the interference, or assume that all
     // through blocks prefer spilling when forming compact regions.
     auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
-    if (Cand.PhysReg)
-      addThroughConstraints(Cand.Intf, NewBlocks);
-    else
+    if (Cand.PhysReg) {
+      if (!addThroughConstraints(Cand.Intf, NewBlocks))
+        return false;
+    } else
       // Provide a strong negative bias on through blocks to prevent unwanted
       // liveness on loop backedges.
       SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
@@ -1326,6 +1341,7 @@ void RAGreedy::growRegion(GlobalSplitCan
     SpillPlacer->iterate();
   }
   LLVM_DEBUG(dbgs() << ", v=" << Visited);
+  return true;
 }
 
 /// calcCompactRegion - Compute the set of edge bundles that should be live
@@ -1356,7 +1372,11 @@ bool RAGreedy::calcCompactRegion(GlobalS
     return false;
   }
 
-  growRegion(Cand);
+  if (!growRegion(Cand)) {
+    LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+    return false;
+  }
+
   SpillPlacer->finish();
 
   if (!Cand.LiveBundles.any()) {
@@ -1886,7 +1906,10 @@ unsigned RAGreedy::calculateRegionSplitC
       });
       continue;
     }
-    growRegion(Cand);
+    if (!growRegion(Cand)) {
+      LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+      continue;
+    }
 
     SpillPlacer->finish();
 

Modified: llvm/trunk/lib/CodeGen/SplitKit.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.h?rev=343004&r1=343003&r2=343004&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SplitKit.h (original)
+++ llvm/trunk/lib/CodeGen/SplitKit.h Tue Sep 25 11:37:38 2018
@@ -25,6 +25,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SlotIndexes.h"
@@ -76,6 +77,18 @@ public:
   /// Returns the last insert point as an iterator for \pCurLI in \pMBB.
   MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI,
                                                      MachineBasicBlock &MBB);
+
+  /// Return the base index of the first insert point in \pMBB.
+  SlotIndex getFirstInsertPoint(MachineBasicBlock &MBB) {
+    SlotIndex Res = LIS.getMBBStartIdx(&MBB);
+    if (!MBB.empty()) {
+      MachineBasicBlock::iterator MII = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+      if (MII != MBB.end())
+        Res = LIS.getInstructionIndex(*MII);
+    }
+    return Res;
+  }
+
 };
 
 /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
@@ -225,6 +238,10 @@ public:
   MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {
     return IPA.getLastInsertPointIter(*CurLI, *BB);
   }
+
+  SlotIndex getFirstSplitPoint(unsigned Num) {
+    return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num));
+  }
 };
 
 /// SplitEditor - Edit machine code and LiveIntervals for live range

Added: llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir?rev=343004&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir Tue Sep 25 11:37:38 2018
@@ -0,0 +1,100 @@
+# REQUIRES: asserts
+# RUN: llc -mtriple=amdgcn--- -verify-machineinstrs -debug-only=regalloc -run-pass=greedy -o /dev/null %s 2>&1 | FileCheck %s
+
+---
+# Check that physreg candidate is not used since cannot be spilled in a block,
+# e.g. before exec mask preamble
+# CHECK: , cannot spill all interferences.
+
+name:            foo
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sreg_64 }
+  - { id: 1100, class: sreg_128 }
+  - { id: 1101, class: sreg_128 }
+  - { id: 1102, class: sreg_128 }
+  - { id: 1103, class: sreg_128 }
+  - { id: 1104, class: sreg_128 }
+  - { id: 1105, class: sreg_128 }
+  - { id: 1106, class: sreg_128 }
+  - { id: 1107, class: sreg_128 }
+  - { id: 1108, class: sreg_128 }
+  - { id: 1109, class: sreg_128 }
+  - { id: 1110, class: sreg_128 }
+  - { id: 1111, class: sreg_128 }
+  - { id: 1112, class: sreg_128 }
+  - { id: 1113, class: sreg_128 }
+  - { id: 1114, class: sreg_128 }
+  - { id: 1115, class: sreg_128 }
+  - { id: 1116, class: sreg_128 }
+  - { id: 1117, class: sreg_128 }
+  - { id: 1118, class: sreg_128 }
+  - { id: 1119, class: sreg_128 }
+  - { id: 1120, class: sreg_128 }
+  - { id: 1121, class: sreg_128 }
+body:             |
+  bb.0:
+    successors: %bb.1
+    liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103
+    %0:sreg_64 = COPY $sgpr102_sgpr103
+    %1100 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+    %1101 = COPY %1100
+    %1102 = COPY %1100
+    %1103 = COPY %1100
+    %1104 = COPY %1100
+    %1105 = COPY %1100
+    %1106 = COPY %1100
+    %1107 = COPY %1100
+    %1108 = COPY %1100
+    %1109 = COPY %1100
+    %1110 = COPY %1100
+    %1111 = COPY %1100
+    %1112 = COPY %1100
+    %1113 = COPY %1100
+    %1114 = COPY %1100
+    %1115 = COPY %1100
+    %1116 = COPY %1100
+    %1117 = COPY %1100
+    %1118 = COPY %1100
+    %1119 = COPY %1100
+    %1120 = COPY %1100
+    %1121 = COPY %1100
+    S_BRANCH %bb.1
+
+  bb.1:
+    liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103
+    %0 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
+    $exec = S_XOR_B64_term $exec, %0, implicit-def $scc
+    SI_MASK_BRANCH %bb.100, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    liveins: $sgpr98_sgpr99, $sgpr102_sgpr103
+    %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
+    $exec = S_XOR_B64_term $exec, %0, implicit-def $scc
+    SI_MASK_BRANCH %bb.100, implicit $exec
+    S_BRANCH %bb.200
+
+  bb.100:
+    liveins: $sgpr102_sgpr103
+    %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec
+    $exec = S_XOR_B64_term $exec, %0, implicit-def $scc
+    S_BRANCH %bb.200
+
+  bb.200:
+    S_CMP_EQ_U64 %1100.sub0_sub1, %1101.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1102.sub0_sub1, %1103.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1104.sub0_sub1, %1105.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1106.sub0_sub1, %1107.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1108.sub0_sub1, %1109.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1110.sub0_sub1, %1111.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1112.sub0_sub1, %1113.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1114.sub0_sub1, %1115.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1116.sub0_sub1, %1117.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1118.sub0_sub1, %1119.sub2_sub3, implicit-def $scc
+    S_CMP_EQ_U64 %1120.sub0_sub1, %1121.sub2_sub3, implicit-def $scc
+
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    S_SETPC_B64_return %0, implicit $vgpr0
+
+...




More information about the llvm-commits mailing list