[llvm] e6ada71 - [regalloc][basic] Change spill weight for optsize funcs (#112960)

Mon Oct 21 11:10:54 PDT 2024

Author: Ellis Hoag
Date: 2024-10-21T11:10:50-07:00
New Revision: e6ada7162e25ab28f6e588fba23f0c11dd1238b5

URL: https://github.com/llvm/llvm-project/commit/e6ada7162e25ab28f6e588fba23f0c11dd1238b5
DIFF: https://github.com/llvm/llvm-project/commit/e6ada7162e25ab28f6e588fba23f0c11dd1238b5.diff

LOG: [regalloc][basic] Change spill weight for optsize funcs (#112960)

Change the spill weight calculations for `optsize` functions to remove
the block frequency multiplier. For those functions, we do not want to
consider the runtime cost of spilling, only the codesize cost.

I built a large app with the basic and greedy (default) register
allocator enabled.

| Regalloc Type | Uncompressed Size Delta | Compressed Size Delta |
| - | - | - |
| Basic | -303.8 KiB (-0.23%) | -232.0 KiB (-0.39%) |
| Greedy | 159.1 KiB (0.12%) | 130.1 KiB (0.22%) |

Since I only saw a size win with the basic register allocator, I decided
to only change the behavior for that type.

Added: 
    llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll

Modified: 
    llvm/include/llvm/CodeGen/CalcSpillWeights.h
    llvm/include/llvm/CodeGen/LiveIntervals.h
    llvm/lib/CodeGen/CalcSpillWeights.cpp
    llvm/lib/CodeGen/LiveIntervals.cpp
    llvm/lib/CodeGen/RegAllocBasic.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h
index 41b7f10cfc38ac..acb8b762efc643 100644

--- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h
@@ -18,6 +18,7 @@ class LiveIntervals;
 class MachineBlockFrequencyInfo;
 class MachineFunction;
 class MachineLoopInfo;
+class ProfileSummaryInfo;
 class VirtRegMap;
 
   /// Normalize the spill weight of a live interval
@@ -47,6 +48,7 @@ class VirtRegMap;
     LiveIntervals &LIS;
     const VirtRegMap &VRM;
     const MachineLoopInfo &Loops;
+    ProfileSummaryInfo *PSI;
     const MachineBlockFrequencyInfo &MBFI;
 
     /// Returns true if Reg of live interval LI is used in instruction with many
@@ -56,8 +58,9 @@ class VirtRegMap;
   public:
     VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS,
                    const VirtRegMap &VRM, const MachineLoopInfo &Loops,
-                   const MachineBlockFrequencyInfo &MBFI)
-        : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {}
+                   const MachineBlockFrequencyInfo &MBFI,
+                   ProfileSummaryInfo *PSI = nullptr)
+        : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), PSI(PSI), MBFI(MBFI) {}
 
     virtual ~VirtRegAuxInfo() = default;
 

diff  --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h
index 4c45a9676d6bd1..161bb247a0e968 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -47,6 +47,7 @@ class MachineDominatorTree;
 class MachineFunction;
 class MachineInstr;
 class MachineRegisterInfo;
+class ProfileSummaryInfo;
 class raw_ostream;
 class TargetInstrInfo;
 class VirtRegMap;
@@ -113,14 +114,18 @@ class LiveIntervals {
   ~LiveIntervals();
 
   /// Calculate the spill weight to assign to a single instruction.
+  /// If \p PSI is provided the calculation is altered for optsize functions.
   static float getSpillWeight(bool isDef, bool isUse,
                               const MachineBlockFrequencyInfo *MBFI,
-                              const MachineInstr &MI);
+                              const MachineInstr &MI,
+                              ProfileSummaryInfo *PSI = nullptr);
 
   /// Calculate the spill weight to assign to a single instruction.
+  /// If \p PSI is provided the calculation is altered for optsize functions.
   static float getSpillWeight(bool isDef, bool isUse,
                               const MachineBlockFrequencyInfo *MBFI,
-                              const MachineBasicBlock *MBB);
+                              const MachineBasicBlock *MBB,
+                              ProfileSummaryInfo *PSI = nullptr);
 
   LiveInterval &getInterval(Register Reg) {
     if (hasInterval(Reg))

diff  --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 88ed2291313c95..f361c956092e88 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -199,8 +199,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
     // localLI = COPY other
     // ...
     // other   = COPY localLI
-    TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB);
-    TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB);
+    TotalWeight +=
+        LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB, PSI);
+    TotalWeight +=
+        LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB, PSI);
 
     NumInstr += 2;
   }
@@ -272,7 +274,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
       // Calculate instr weight.
       bool Reads, Writes;
       std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
-      Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI);
+      Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI, PSI);
 
       // Give extra weight to what looks like a loop induction variable update.
       if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB))

diff  --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 7ddaaaa915ef17..21a316cf99a217 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/StackMaps.h"
@@ -37,6 +38,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/IR/ProfileSummary.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -875,14 +877,23 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
 
 float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
                                     const MachineBlockFrequencyInfo *MBFI,
-                                    const MachineInstr &MI) {
-  return getSpillWeight(isDef, isUse, MBFI, MI.getParent());
+                                    const MachineInstr &MI,
+                                    ProfileSummaryInfo *PSI) {
+  return getSpillWeight(isDef, isUse, MBFI, MI.getParent(), PSI);
 }
 
 float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
                                     const MachineBlockFrequencyInfo *MBFI,
-                                    const MachineBasicBlock *MBB) {
-  return (isDef + isUse) * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
+                                    const MachineBasicBlock *MBB,
+                                    ProfileSummaryInfo *PSI) {
+  float Weight = isDef + isUse;
+  const auto *MF = MBB->getParent();
+  // When optimizing for size we only consider the codesize impact of spilling
+  // the register, not the runtime impact.
+  if (PSI && (MF->getFunction().hasOptSize() ||
+              llvm::shouldOptimizeForSize(MF, PSI, MBFI)))
+    return Weight;
+  return Weight * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
 }
 
 LiveRange::Segment

diff  --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index caf9c32a5a3498..046784c386e301 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -14,6 +14,7 @@
 #include "AllocationOrder.h"
 #include "RegAllocBase.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveDebugVariables.h"
 #include "llvm/CodeGen/LiveIntervals.h"
@@ -140,6 +141,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
                     false)
 
@@ -182,6 +184,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<LiveDebugVariables>();
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
+  AU.addRequired<ProfileSummaryInfoWrapperPass>();
   AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
   AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
   AU.addRequiredID(MachineDominatorsID);
@@ -312,7 +315,8 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
                      getAnalysis<LiveRegMatrix>());
   VirtRegAuxInfo VRAI(
       *MF, *LIS, *VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(),
-      getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
+      getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
+      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
   VRAI.calculateSpillWeightsAndHints();
 
   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI));

diff  --git a/llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll b/llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll
new file mode 100644
index 00000000000000..5c3bd984087ec1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc < %s -mtriple=aarch64 -regalloc=basic | FileCheck %s
+
+; Test that the register allocator behaves 
diff erently with minsize functions.
+
+declare void @foo(i32, ptr)
+
+define void @optsize(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) minsize {
+; CHECK-LABEL: optsize:
+; CHECK:       // %bb.0: // %bb
+; CHECK-NEXT:    stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w23, -40
+; CHECK-NEXT:    .cfi_offset w30, -48
+; CHECK-NEXT:    mov w23, w5
+; CHECK-NEXT:    mov x22, x4
+; CHECK-NEXT:    mov x21, x3
+; CHECK-NEXT:    mov x20, x2
+; CHECK-NEXT:    mov w19, w1
+; CHECK-NEXT:  .LBB0_1: // %bb8
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cbz w19, .LBB0_1
+; CHECK-NEXT:  // %bb.2: // %bb8
+; CHECK-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    cmp w19, #39
+; CHECK-NEXT:    b.eq .LBB0_6
+; CHECK-NEXT:  // %bb.3: // %bb8
+; CHECK-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    cmp w19, #34
+; CHECK-NEXT:    b.eq .LBB0_6
+; CHECK-NEXT:  // %bb.4: // %bb8
+; CHECK-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    cmp w19, #10
+; CHECK-NEXT:    b.ne .LBB0_1
+; CHECK-NEXT:  // %bb.5: // %bb9
+; CHECK-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    str wzr, [x20]
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_6: // %bb10
+; CHECK-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    mov w0, w23
+; CHECK-NEXT:    mov x1, x21
+; CHECK-NEXT:    str wzr, [x22]
+; CHECK-NEXT:    bl foo
+; CHECK-NEXT:    b .LBB0_1
+bb:
+  br label %bb7
+
+bb7:                                              ; preds = %bb13, %bb
+  %phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ]
+  br label %bb8
+
+bb8:                                              ; preds = %bb10, %bb9, %bb8, %bb7
+  switch i32 %arg1, label %bb8 [
+    i32 10, label %bb9
+    i32 1, label %bb16
+    i32 0, label %bb13
+    i32 39, label %bb10
+    i32 34, label %bb10
+  ]
+
+bb9:                                              ; preds = %bb8
+  store i32 0, ptr %arg2, align 4
+  br label %bb8
+
+bb10:                                             ; preds = %bb8, %bb8
+  store i32 0, ptr %arg4, align 4
+  tail call void @foo(i32 %arg5, ptr %arg3)
+  br label %bb8
+
+bb13:                                             ; preds = %bb8
+  %not.arg6 = xor i1 %arg6, true
+  %spec.select = zext i1 %not.arg6 to i32
+  br label %bb7
+
+bb16:                                             ; preds = %bb8
+  unreachable
+}
+
+define void @optspeed(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) {
+; CHECK-LABEL: optspeed:
+; CHECK:       // %bb.0: // %bb
+; CHECK-NEXT:    stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w23, -40
+; CHECK-NEXT:    .cfi_offset w30, -48
+; CHECK-NEXT:    mov w22, w5
+; CHECK-NEXT:    mov x21, x4
+; CHECK-NEXT:    mov x20, x3
+; CHECK-NEXT:    mov x23, x2
+; CHECK-NEXT:    mov w19, w1
+; CHECK-NEXT:    b .LBB1_2
+; CHECK-NEXT:  .LBB1_1: // %bb10
+; CHECK-NEXT:    // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    mov w0, w22
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    str wzr, [x21]
+; CHECK-NEXT:    bl foo
+; CHECK-NEXT:  .LBB1_2: // %bb8
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cmp w19, #33
+; CHECK-NEXT:    b.gt .LBB1_6
+; CHECK-NEXT:  // %bb.3: // %bb8
+; CHECK-NEXT:    // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    cbz w19, .LBB1_2
+; CHECK-NEXT:  // %bb.4: // %bb8
+; CHECK-NEXT:    // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    cmp w19, #10
+; CHECK-NEXT:    b.ne .LBB1_2
+; CHECK-NEXT:  // %bb.5: // %bb9
+; CHECK-NEXT:    // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    str wzr, [x23]
+; CHECK-NEXT:    b .LBB1_2
+; CHECK-NEXT:  .LBB1_6: // %bb8
+; CHECK-NEXT:    // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    cmp w19, #34
+; CHECK-NEXT:    b.eq .LBB1_1
+; CHECK-NEXT:  // %bb.7: // %bb8
+; CHECK-NEXT:    // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    cmp w19, #39
+; CHECK-NEXT:    b.eq .LBB1_1
+; CHECK-NEXT:    b .LBB1_2
+bb:
+  br label %bb7
+
+bb7:                                              ; preds = %bb13, %bb
+  %phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ]
+  br label %bb8
+
+bb8:                                              ; preds = %bb10, %bb9, %bb8, %bb7
+  switch i32 %arg1, label %bb8 [
+    i32 10, label %bb9
+    i32 1, label %bb16
+    i32 0, label %bb13
+    i32 39, label %bb10
+    i32 34, label %bb10
+  ]
+
+bb9:                                              ; preds = %bb8
+  store i32 0, ptr %arg2, align 4
+  br label %bb8
+
+bb10:                                             ; preds = %bb8, %bb8
+  store i32 0, ptr %arg4, align 4
+  tail call void @foo(i32 %arg5, ptr %arg3)
+  br label %bb8
+
+bb13:                                             ; preds = %bb8
+  %not.arg6 = xor i1 %arg6, true
+  %spec.select = zext i1 %not.arg6 to i32
+  br label %bb7
+
+bb16:                                             ; preds = %bb8
+  unreachable
+}