[llvm] 7748055 - [RegAllocGreedy] New hook regClassPriorityTrumpsGlobalness
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue May 17 04:42:49 PDT 2022
Author: Jay Foad
Date: 2022-05-17T12:35:21+01:00
New Revision: 77480556c41fbca36b918323c69cb77f8e02b56c
URL: https://github.com/llvm/llvm-project/commit/77480556c41fbca36b918323c69cb77f8e02b56c
DIFF: https://github.com/llvm/llvm-project/commit/77480556c41fbca36b918323c69cb77f8e02b56c.diff
LOG: [RegAllocGreedy] New hook regClassPriorityTrumpsGlobalness
Add a new TargetRegisterInfo hook to allow targets to tweak the
priority of live ranges, so that AllocationPriority of the register
class will be treated as more important than whether the range is local
to a basic block or global. This is determined per-MachineFunction.
Differential Revision: https://reviews.llvm.org/D125102
Added:
llvm/test/CodeGen/AMDGPU/greedy-liverange-priority.mir
Modified:
llvm/include/llvm/CodeGen/TargetRegisterInfo.h
llvm/include/llvm/Target/Target.td
llvm/lib/CodeGen/RegAllocGreedy.cpp
llvm/lib/CodeGen/RegAllocGreedy.h
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 1e316f026266..04369a5bfe0d 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -55,6 +55,8 @@ class TargetRegisterClass {
const LaneBitmask LaneMask;
/// Classes with a higher priority value are assigned first by register
/// allocators using a greedy heuristic. The value is in the range [0,63].
+ /// Values >= 32 should be used with care since they may overlap with other
+ /// fields in the allocator's priority heuristics.
const uint8_t AllocationPriority;
/// Configurable target specific flags.
const uint8_t TSFlags;
@@ -1076,6 +1078,14 @@ class TargetRegisterInfo : public MCRegisterInfo {
return false;
}
+ /// When prioritizing live ranges in register allocation, if this hook returns
+ /// true then the AllocationPriority of the register class will be treated as
+ /// more important than whether the range is local to a basic block or global.
+ virtual bool
+ regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const {
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
/// Debug information queries.
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 78681067563a..c5b2462dc868 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -279,6 +279,8 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
// heuristic. Classes with higher priority values are assigned first. This is
// useful as it is sometimes beneficial to assign registers to highly
// constrained classes first. The value has to be in the range [0,63].
+ // Values >= 32 should be used with care since they may overlap with other
+ // fields in the allocator's priority heuristics.
int AllocationPriority = 0;
// Generate register pressure set for this register class and any class
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 31721da2c830..0bcd2c567a50 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -128,6 +128,13 @@ static cl::opt<unsigned long> GrowRegionComplexityBudget(
"limit its budget and bail out once we reach the limit."),
cl::init(10000), cl::Hidden);
+static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
+ "greedy-regclass-priority-trumps-globalness",
+ cl::desc("Change the greedy register allocator's live range priority "
+ "calculation to make the AllocationPriority of the register class "
+ "more important then whether the range is global"),
+ cl::Hidden);
+
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
@@ -305,6 +312,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
bool ForceGlobal = !ReverseLocal &&
(Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
+ unsigned GlobalBit = 0;
if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
@@ -323,9 +331,13 @@ void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
// Allocate global and split ranges in long->short order. Long ranges that
// don't fit should be spilled (or split) ASAP so they don't create
// interference. Mark a bit to prioritize global above local ranges.
- Prio = (1u << 29) + Size;
+ Prio = Size;
+ GlobalBit = 1;
}
- Prio |= RC.AllocationPriority << 24;
+ if (RegClassPriorityTrumpsGlobalness)
+ Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
+ else
+ Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
// Mark a higher bit to prioritize global and local above RS_Split.
Prio |= (1u << 31);
@@ -2692,6 +2704,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
initializeCSRCost();
RegCosts = TRI->getRegisterCosts(*MF);
+ RegClassPriorityTrumpsGlobalness =
+ GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
+ ? GreedyRegClassPriorityTrumpsGlobalness
+ : TRI->regClassPriorityTrumpsGlobalness(*MF);
ExtraInfo.emplace();
EvictAdvisor =
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h
index ef4bb90a7b54..ad810c25ec6e 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -322,6 +322,10 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
/// Function
ArrayRef<uint8_t> RegCosts;
+ /// Flags for the live range priority calculation, determined once per
+ /// machine function.
+ bool RegClassPriorityTrumpsGlobalness;
+
public:
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-liverange-priority.mir b/llvm/test/CodeGen/AMDGPU/greedy-liverange-priority.mir
new file mode 100644
index 000000000000..01cda7118716
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/greedy-liverange-priority.mir
@@ -0,0 +1,48 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1030 -greedy-regclass-priority-trumps-globalness=0 -start-before greedy -o - %s | FileCheck %s -check-prefix=OLD
+# RUN: llc -march=amdgcn -mcpu=gfx1030 -greedy-regclass-priority-trumps-globalness=1 -start-before greedy -o - %s | FileCheck %s -check-prefix=NEW
+
+# At the time of writing -greedy-regclass-priority-trumps-globalness makes a
+# significant improvement in the total number of vgprs needed to compile this
+# test, from 11 down to 7.
+
+# OLD: NumVgprs: 11{{$}}
+# NEW: NumVgprs: 7{{$}}
+
+---
+name: _amdgpu_cs_main
+tracksRegLiveness: true
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $vgpr0, $vgpr6
+
+ %6:vgpr_32 = COPY $vgpr6
+ undef %30.sub0:vreg_128 = COPY $vgpr0
+ undef %27.sub0:vreg_128 = V_MED3_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ undef %16.sub0:sgpr_256 = S_MOV_B32 0
+ undef %26.sub1:vreg_64 = V_LSHRREV_B32_e32 1, %6, implicit $exec
+ %27.sub1:vreg_128 = COPY %27.sub0
+ %27.sub2:vreg_128 = COPY %27.sub0
+ %27.sub3:vreg_128 = COPY %27.sub0
+ %26.sub0:vreg_64 = V_MOV_B32_e32 1, implicit $exec
+ %16.sub1:sgpr_256 = COPY %16.sub0
+ %16.sub2:sgpr_256 = COPY %16.sub0
+ %16.sub3:sgpr_256 = COPY %16.sub0
+ %16.sub4:sgpr_256 = COPY %16.sub0
+ %16.sub5:sgpr_256 = COPY %16.sub0
+ %16.sub6:sgpr_256 = COPY %16.sub0
+ %16.sub7:sgpr_256 = COPY %16.sub0
+ IMAGE_STORE_V4_V2_gfx10 %27, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource")
+ S_CBRANCH_SCC1 %bb.2, implicit undef $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ %30.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+ %30.sub2:vreg_128 = COPY %30.sub1
+ %30.sub3:vreg_128 = COPY %30.sub1
+ %26.sub1:vreg_64 = COPY %30.sub1
+ IMAGE_STORE_V4_V2_gfx10 %30, %26, %16, 0, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "ImageResource")
+
+ bb.2:
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list