[llvm] r343004 - [RegAllocGreedy] avoid using physreg candidates that cannot be correctly spilled
Daniil Fukalov via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 25 11:37:38 PDT 2018
Author: dfukalov
Date: Tue Sep 25 11:37:38 2018
New Revision: 343004
URL: http://llvm.org/viewvc/llvm-project?rev=343004&view=rev
Log:
[RegAllocGreedy] avoid using physreg candidates that cannot be correctly spilled
For the AMDGPU target if a MBB contains exec mask restore preamble, SplitEditor may get state when it cannot insert a spill instruction.
E.g. for a MIR
bb.100:
%1 = S_OR_SAVEEXEC_B64 %2, implicit-def $exec, implicit-def $scc, implicit $exec
and if the regalloc will try to allocate a virtreg to the physreg already assigned to virtreg %1, it should insert spill instruction before the S_OR_SAVEEXEC_B64 instruction.
But it is not possible since can generate incorrect code in terms of exec mask.
The change makes regalloc to ignore such physreg candidates.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D52052
Added:
llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir
Modified:
llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
llvm/trunk/lib/CodeGen/SplitKit.h
Modified: llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp?rev=343004&r1=343003&r2=343004&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp Tue Sep 25 11:37:38 2018
@@ -449,8 +449,8 @@ private:
BlockFrequency calcSpillCost();
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
- void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
- void growRegion(GlobalSplitCandidate &Cand);
+ bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ bool growRegion(GlobalSplitCandidate &Cand);
bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order);
@@ -1203,6 +1203,13 @@ bool RAGreedy::addSplitConstraints(Inter
} else if (Intf.first() < BI.LastInstr) {
++Ins;
}
+
+ // Abort if the spill cannot be inserted at the MBB' start
+ if (((BC.Entry == SpillPlacement::MustSpill) ||
+ (BC.Entry == SpillPlacement::PrefSpill)) &&
+ SlotIndex::isEarlierInstr(BI.FirstInstr,
+ SA->getFirstSplitPoint(BC.Number)))
+ return false;
}
// Interference for the live-out value.
@@ -1232,7 +1239,7 @@ bool RAGreedy::addSplitConstraints(Inter
/// addThroughConstraints - Add constraints and links to SpillPlacer from the
/// live-through blocks in Blocks.
-void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
ArrayRef<unsigned> Blocks) {
const unsigned GroupSize = 8;
SpillPlacement::BlockConstraint BCS[GroupSize];
@@ -1256,6 +1263,12 @@ void RAGreedy::addThroughConstraints(Int
assert(B < GroupSize && "Array overflow");
BCS[B].Number = Number;
+ // Abort if the spill cannot be inserted at the MBB' start
+ MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
+ if (!MBB->empty() &&
+ SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()),
+ SA->getFirstSplitPoint(Number)))
+ return false;
// Interference for the live-in value.
if (Intf.first() <= Indexes->getMBBStartIdx(Number))
BCS[B].Entry = SpillPlacement::MustSpill;
@@ -1276,9 +1289,10 @@ void RAGreedy::addThroughConstraints(Int
SpillPlacer->addConstraints(makeArrayRef(BCS, B));
SpillPlacer->addLinks(makeArrayRef(TBS, T));
+ return true;
}
-void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Keep track of through blocks that have not been added to SpillPlacer.
BitVector Todo = SA->getThroughBlocks();
SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
@@ -1314,9 +1328,10 @@ void RAGreedy::growRegion(GlobalSplitCan
// Compute through constraints from the interference, or assume that all
// through blocks prefer spilling when forming compact regions.
auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
- if (Cand.PhysReg)
- addThroughConstraints(Cand.Intf, NewBlocks);
- else
+ if (Cand.PhysReg) {
+ if (!addThroughConstraints(Cand.Intf, NewBlocks))
+ return false;
+ } else
// Provide a strong negative bias on through blocks to prevent unwanted
// liveness on loop backedges.
SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
@@ -1326,6 +1341,7 @@ void RAGreedy::growRegion(GlobalSplitCan
SpillPlacer->iterate();
}
LLVM_DEBUG(dbgs() << ", v=" << Visited);
+ return true;
}
/// calcCompactRegion - Compute the set of edge bundles that should be live
@@ -1356,7 +1372,11 @@ bool RAGreedy::calcCompactRegion(GlobalS
return false;
}
- growRegion(Cand);
+ if (!growRegion(Cand)) {
+ LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+ return false;
+ }
+
SpillPlacer->finish();
if (!Cand.LiveBundles.any()) {
@@ -1886,7 +1906,10 @@ unsigned RAGreedy::calculateRegionSplitC
});
continue;
}
- growRegion(Cand);
+ if (!growRegion(Cand)) {
+ LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
+ continue;
+ }
SpillPlacer->finish();
Modified: llvm/trunk/lib/CodeGen/SplitKit.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.h?rev=343004&r1=343003&r2=343004&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SplitKit.h (original)
+++ llvm/trunk/lib/CodeGen/SplitKit.h Tue Sep 25 11:37:38 2018
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SlotIndexes.h"
@@ -76,6 +77,18 @@ public:
/// Returns the last insert point as an iterator for \pCurLI in \pMBB.
MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI,
MachineBasicBlock &MBB);
+
+ /// Return the base index of the first insert point in \pMBB.
+ SlotIndex getFirstInsertPoint(MachineBasicBlock &MBB) {
+ SlotIndex Res = LIS.getMBBStartIdx(&MBB);
+ if (!MBB.empty()) {
+ MachineBasicBlock::iterator MII = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+ if (MII != MBB.end())
+ Res = LIS.getInstructionIndex(*MII);
+ }
+ return Res;
+ }
+
};
/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
@@ -225,6 +238,10 @@ public:
MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {
return IPA.getLastInsertPointIter(*CurLI, *BB);
}
+
+ SlotIndex getFirstSplitPoint(unsigned Num) {
+ return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num));
+ }
};
/// SplitEditor - Edit machine code and LiveIntervals for live range
Added: llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir?rev=343004&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-before-exec.mir Tue Sep 25 11:37:38 2018
@@ -0,0 +1,100 @@
+# REQUIRES: asserts
+# RUN: llc -mtriple=amdgcn--- -verify-machineinstrs -debug-only=regalloc -run-pass=greedy -o /dev/null %s 2>&1 | FileCheck %s
+
+---
+# Check that physreg candidate is not used since cannot be spilled in a block,
+# e.g. before exec mask preamble
+# CHECK: , cannot spill all interferences.
+
+name: foo
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1100, class: sreg_128 }
+ - { id: 1101, class: sreg_128 }
+ - { id: 1102, class: sreg_128 }
+ - { id: 1103, class: sreg_128 }
+ - { id: 1104, class: sreg_128 }
+ - { id: 1105, class: sreg_128 }
+ - { id: 1106, class: sreg_128 }
+ - { id: 1107, class: sreg_128 }
+ - { id: 1108, class: sreg_128 }
+ - { id: 1109, class: sreg_128 }
+ - { id: 1110, class: sreg_128 }
+ - { id: 1111, class: sreg_128 }
+ - { id: 1112, class: sreg_128 }
+ - { id: 1113, class: sreg_128 }
+ - { id: 1114, class: sreg_128 }
+ - { id: 1115, class: sreg_128 }
+ - { id: 1116, class: sreg_128 }
+ - { id: 1117, class: sreg_128 }
+ - { id: 1118, class: sreg_128 }
+ - { id: 1119, class: sreg_128 }
+ - { id: 1120, class: sreg_128 }
+ - { id: 1121, class: sreg_128 }
+body: |
+ bb.0:
+ successors: %bb.1
+ liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103
+ %0:sreg_64 = COPY $sgpr102_sgpr103
+ %1100 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+ %1101 = COPY %1100
+ %1102 = COPY %1100
+ %1103 = COPY %1100
+ %1104 = COPY %1100
+ %1105 = COPY %1100
+ %1106 = COPY %1100
+ %1107 = COPY %1100
+ %1108 = COPY %1100
+ %1109 = COPY %1100
+ %1110 = COPY %1100
+ %1111 = COPY %1100
+ %1112 = COPY %1100
+ %1113 = COPY %1100
+ %1114 = COPY %1100
+ %1115 = COPY %1100
+ %1116 = COPY %1100
+ %1117 = COPY %1100
+ %1118 = COPY %1100
+ %1119 = COPY %1100
+ %1120 = COPY %1100
+ %1121 = COPY %1100
+ S_BRANCH %bb.1
+
+ bb.1:
+ liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103
+ %0 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
+ $exec = S_XOR_B64_term $exec, %0, implicit-def $scc
+ SI_MASK_BRANCH %bb.100, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ liveins: $sgpr98_sgpr99, $sgpr102_sgpr103
+ %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
+ $exec = S_XOR_B64_term $exec, %0, implicit-def $scc
+ SI_MASK_BRANCH %bb.100, implicit $exec
+ S_BRANCH %bb.200
+
+ bb.100:
+ liveins: $sgpr102_sgpr103
+ %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec
+ $exec = S_XOR_B64_term $exec, %0, implicit-def $scc
+ S_BRANCH %bb.200
+
+ bb.200:
+ S_CMP_EQ_U64 %1100.sub0_sub1, %1101.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1102.sub0_sub1, %1103.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1104.sub0_sub1, %1105.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1106.sub0_sub1, %1107.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1108.sub0_sub1, %1109.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1110.sub0_sub1, %1111.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1112.sub0_sub1, %1113.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1114.sub0_sub1, %1115.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1116.sub0_sub1, %1117.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1118.sub0_sub1, %1119.sub2_sub3, implicit-def $scc
+ S_CMP_EQ_U64 %1120.sub0_sub1, %1121.sub2_sub3, implicit-def $scc
+
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ S_SETPC_B64_return %0, implicit $vgpr0
+
+...
More information about the llvm-commits
mailing list