[llvm] e6ada71 - [regalloc][basic] Change spill weight for optsize funcs (#112960)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 21 11:10:54 PDT 2024
Author: Ellis Hoag
Date: 2024-10-21T11:10:50-07:00
New Revision: e6ada7162e25ab28f6e588fba23f0c11dd1238b5
URL: https://github.com/llvm/llvm-project/commit/e6ada7162e25ab28f6e588fba23f0c11dd1238b5
DIFF: https://github.com/llvm/llvm-project/commit/e6ada7162e25ab28f6e588fba23f0c11dd1238b5.diff
LOG: [regalloc][basic] Change spill weight for optsize funcs (#112960)
Change the spill weight calculations for `optsize` functions to remove
the block frequency multiplier. For those functions, we do not want to
consider the runtime cost of spilling, only the codesize cost.
I built a large app with the basic and greedy (default) register
allocator enabled.
| Regalloc Type | Uncompressed Size Delta | Compressed Size Delta |
| - | - | - |
| Basic | -303.8 KiB (-0.23%) | -232.0 KiB (-0.39%) |
| Greedy | 159.1 KiB (0.12%) | 130.1 KiB (0.22%) |
Since I only saw a size win with the basic register allocator, I decided
to only change the behavior for that type.
Added:
llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll
Modified:
llvm/include/llvm/CodeGen/CalcSpillWeights.h
llvm/include/llvm/CodeGen/LiveIntervals.h
llvm/lib/CodeGen/CalcSpillWeights.cpp
llvm/lib/CodeGen/LiveIntervals.cpp
llvm/lib/CodeGen/RegAllocBasic.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h
index 41b7f10cfc38ac..acb8b762efc643 100644
--- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h
@@ -18,6 +18,7 @@ class LiveIntervals;
class MachineBlockFrequencyInfo;
class MachineFunction;
class MachineLoopInfo;
+class ProfileSummaryInfo;
class VirtRegMap;
/// Normalize the spill weight of a live interval
@@ -47,6 +48,7 @@ class VirtRegMap;
LiveIntervals &LIS;
const VirtRegMap &VRM;
const MachineLoopInfo &Loops;
+ ProfileSummaryInfo *PSI;
const MachineBlockFrequencyInfo &MBFI;
/// Returns true if Reg of live interval LI is used in instruction with many
@@ -56,8 +58,9 @@ class VirtRegMap;
public:
VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS,
const VirtRegMap &VRM, const MachineLoopInfo &Loops,
- const MachineBlockFrequencyInfo &MBFI)
- : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {}
+ const MachineBlockFrequencyInfo &MBFI,
+ ProfileSummaryInfo *PSI = nullptr)
+ : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), PSI(PSI), MBFI(MBFI) {}
virtual ~VirtRegAuxInfo() = default;
diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h
index 4c45a9676d6bd1..161bb247a0e968 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -47,6 +47,7 @@ class MachineDominatorTree;
class MachineFunction;
class MachineInstr;
class MachineRegisterInfo;
+class ProfileSummaryInfo;
class raw_ostream;
class TargetInstrInfo;
class VirtRegMap;
@@ -113,14 +114,18 @@ class LiveIntervals {
~LiveIntervals();
/// Calculate the spill weight to assign to a single instruction.
+ /// If \p PSI is provided the calculation is altered for optsize functions.
static float getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
- const MachineInstr &MI);
+ const MachineInstr &MI,
+ ProfileSummaryInfo *PSI = nullptr);
/// Calculate the spill weight to assign to a single instruction.
+ /// If \p PSI is provided the calculation is altered for optsize functions.
static float getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
- const MachineBasicBlock *MBB);
+ const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI = nullptr);
LiveInterval &getInterval(Register Reg) {
if (hasInterval(Reg))
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 88ed2291313c95..f361c956092e88 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -199,8 +199,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// localLI = COPY other
// ...
// other = COPY localLI
- TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB);
- TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB);
+ TotalWeight +=
+ LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB, PSI);
+ TotalWeight +=
+ LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB, PSI);
NumInstr += 2;
}
@@ -272,7 +274,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// Calculate instr weight.
bool Reads, Writes;
std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
- Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI);
+ Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI, PSI);
// Give extra weight to what looks like a loop induction variable update.
if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB))
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 7ddaaaa915ef17..21a316cf99a217 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -37,6 +38,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/ProfileSummary.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -875,14 +877,23 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
- const MachineInstr &MI) {
- return getSpillWeight(isDef, isUse, MBFI, MI.getParent());
+ const MachineInstr &MI,
+ ProfileSummaryInfo *PSI) {
+ return getSpillWeight(isDef, isUse, MBFI, MI.getParent(), PSI);
}
float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
- const MachineBasicBlock *MBB) {
- return (isDef + isUse) * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
+ const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI) {
+ float Weight = isDef + isUse;
+ const auto *MF = MBB->getParent();
+ // When optimizing for size we only consider the codesize impact of spilling
+ // the register, not the runtime impact.
+ if (PSI && (MF->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(MF, PSI, MBFI)))
+ return Weight;
+ return Weight * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
}
LiveRange::Segment
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index caf9c32a5a3498..046784c386e301 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -14,6 +14,7 @@
#include "AllocationOrder.h"
#include "RegAllocBase.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -140,6 +141,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
false)
@@ -182,6 +184,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveDebugVariables>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequiredID(MachineDominatorsID);
@@ -312,7 +315,8 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
getAnalysis<LiveRegMatrix>());
VirtRegAuxInfo VRAI(
*MF, *LIS, *VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(),
- getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
+ getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
VRAI.calculateSpillWeightsAndHints();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI));
diff --git a/llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll b/llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll
new file mode 100644
index 00000000000000..5c3bd984087ec1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc < %s -mtriple=aarch64 -regalloc=basic | FileCheck %s
+
+; Test that the register allocator behaves
diff erently with minsize functions.
+
+declare void @foo(i32, ptr)
+
+define void @optsize(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) minsize {
+; CHECK-LABEL: optsize:
+; CHECK: // %bb.0: // %bb
+; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w21, -24
+; CHECK-NEXT: .cfi_offset w22, -32
+; CHECK-NEXT: .cfi_offset w23, -40
+; CHECK-NEXT: .cfi_offset w30, -48
+; CHECK-NEXT: mov w23, w5
+; CHECK-NEXT: mov x22, x4
+; CHECK-NEXT: mov x21, x3
+; CHECK-NEXT: mov x20, x2
+; CHECK-NEXT: mov w19, w1
+; CHECK-NEXT: .LBB0_1: // %bb8
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cbz w19, .LBB0_1
+; CHECK-NEXT: // %bb.2: // %bb8
+; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: cmp w19, #39
+; CHECK-NEXT: b.eq .LBB0_6
+; CHECK-NEXT: // %bb.3: // %bb8
+; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: cmp w19, #34
+; CHECK-NEXT: b.eq .LBB0_6
+; CHECK-NEXT: // %bb.4: // %bb8
+; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: cmp w19, #10
+; CHECK-NEXT: b.ne .LBB0_1
+; CHECK-NEXT: // %bb.5: // %bb9
+; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: str wzr, [x20]
+; CHECK-NEXT: b .LBB0_1
+; CHECK-NEXT: .LBB0_6: // %bb10
+; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: mov w0, w23
+; CHECK-NEXT: mov x1, x21
+; CHECK-NEXT: str wzr, [x22]
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: b .LBB0_1
+bb:
+ br label %bb7
+
+bb7: ; preds = %bb13, %bb
+ %phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ]
+ br label %bb8
+
+bb8: ; preds = %bb10, %bb9, %bb8, %bb7
+ switch i32 %arg1, label %bb8 [
+ i32 10, label %bb9
+ i32 1, label %bb16
+ i32 0, label %bb13
+ i32 39, label %bb10
+ i32 34, label %bb10
+ ]
+
+bb9: ; preds = %bb8
+ store i32 0, ptr %arg2, align 4
+ br label %bb8
+
+bb10: ; preds = %bb8, %bb8
+ store i32 0, ptr %arg4, align 4
+ tail call void @foo(i32 %arg5, ptr %arg3)
+ br label %bb8
+
+bb13: ; preds = %bb8
+ %not.arg6 = xor i1 %arg6, true
+ %spec.select = zext i1 %not.arg6 to i32
+ br label %bb7
+
+bb16: ; preds = %bb8
+ unreachable
+}
+
+define void @optspeed(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) {
+; CHECK-LABEL: optspeed:
+; CHECK: // %bb.0: // %bb
+; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w21, -24
+; CHECK-NEXT: .cfi_offset w22, -32
+; CHECK-NEXT: .cfi_offset w23, -40
+; CHECK-NEXT: .cfi_offset w30, -48
+; CHECK-NEXT: mov w22, w5
+; CHECK-NEXT: mov x21, x4
+; CHECK-NEXT: mov x20, x3
+; CHECK-NEXT: mov x23, x2
+; CHECK-NEXT: mov w19, w1
+; CHECK-NEXT: b .LBB1_2
+; CHECK-NEXT: .LBB1_1: // %bb10
+; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: mov w0, w22
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: str wzr, [x21]
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: .LBB1_2: // %bb8
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cmp w19, #33
+; CHECK-NEXT: b.gt .LBB1_6
+; CHECK-NEXT: // %bb.3: // %bb8
+; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: cbz w19, .LBB1_2
+; CHECK-NEXT: // %bb.4: // %bb8
+; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: cmp w19, #10
+; CHECK-NEXT: b.ne .LBB1_2
+; CHECK-NEXT: // %bb.5: // %bb9
+; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: str wzr, [x23]
+; CHECK-NEXT: b .LBB1_2
+; CHECK-NEXT: .LBB1_6: // %bb8
+; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: cmp w19, #34
+; CHECK-NEXT: b.eq .LBB1_1
+; CHECK-NEXT: // %bb.7: // %bb8
+; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: cmp w19, #39
+; CHECK-NEXT: b.eq .LBB1_1
+; CHECK-NEXT: b .LBB1_2
+bb:
+ br label %bb7
+
+bb7: ; preds = %bb13, %bb
+ %phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ]
+ br label %bb8
+
+bb8: ; preds = %bb10, %bb9, %bb8, %bb7
+ switch i32 %arg1, label %bb8 [
+ i32 10, label %bb9
+ i32 1, label %bb16
+ i32 0, label %bb13
+ i32 39, label %bb10
+ i32 34, label %bb10
+ ]
+
+bb9: ; preds = %bb8
+ store i32 0, ptr %arg2, align 4
+ br label %bb8
+
+bb10: ; preds = %bb8, %bb8
+ store i32 0, ptr %arg4, align 4
+ tail call void @foo(i32 %arg5, ptr %arg3)
+ br label %bb8
+
+bb13: ; preds = %bb8
+ %not.arg6 = xor i1 %arg6, true
+ %spec.select = zext i1 %not.arg6 to i32
+ br label %bb7
+
+bb16: ; preds = %bb8
+ unreachable
+}
More information about the llvm-commits
mailing list