[llvm] [RegAlloc] Scale the spill weight by target factor (PR #113675)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 12 01:10:49 PDT 2025
https://github.com/lukel97 approved this pull request.
I added some statistics to collect the total "lmul" that is spilled and reloaded statically with this change:
```diff
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2fdf6bd36e88..f4b5a5b29e71 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -18,6 +18,7 @@
#include "RISCVSubtarget.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -43,6 +44,10 @@ using namespace llvm;
#define GET_INSTRINFO_NAMED_OPS
#include "RISCVGenInstrInfo.inc"
+#define DEBUG_TYPE "riscv-instr-info"
+STATISTIC(TotalLMULSpilled, "Total LMUL spilled");
+STATISTIC(TotalLMULReloaded, "Total LMUL reloaded");
+
static cl::opt<bool> PreferWholeRegisterMove(
"riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
cl::desc("Prefer whole register move for vector registers."));
@@ -615,12 +620,16 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
IsScalableVector = false;
} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VS1R_V;
+ TotalLMULSpilled += 1;
} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VS2R_V;
+ TotalLMULSpilled += 2;
} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VS4R_V;
+ TotalLMULSpilled += 4;
} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VS8R_V;
+ TotalLMULSpilled += 8;
} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVSPILL2_M1;
else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
@@ -706,12 +715,16 @@ void RISCVInstrInfo::loadRegFromStackSlot(
IsScalableVector = false;
} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VL1RE8_V;
+ TotalLMULReloaded += 1;
} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VL2RE8_V;
+ TotalLMULReloaded += 2;
} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VL4RE8_V;
+ TotalLMULReloaded += 4;
} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VL8RE8_V;
+ TotalLMULReloaded += 8;
} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
Opcode = RISCV::PseudoVRELOAD2_M1;
else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
```
Running it against SPEC CPU 2017, -O3 -march=rva23u64, it looks like this helps a good bit on imagick/gcc/deepsjeng!
| ('Program', '') | ('riscv-instr-info.TotalLMULReloaded', 'lhs') | ('riscv-instr-info.TotalLMULReloaded', 'rhs') | ('riscv-instr-info.TotalLMULReloaded', 'diff') | ('riscv-instr-info.TotalLMULSpilled', 'lhs') | ('riscv-instr-info.TotalLMULSpilled', 'rhs') | ('riscv-instr-info.TotalLMULSpilled', 'diff') |
|:--------------------------------------------|------------------------------------------------:|------------------------------------------------:|-------------------------------------------------:|-----------------------------------------------:|-----------------------------------------------:|------------------------------------------------:|
| FP2017rate/508.namd_r/508.namd_r | 6 | 6 | 0 | 1 | 1 | 0 |
| INT2017spe...ed/620.omnetpp_s/620.omnetpp_s | 5 | 5 | 0 | 4 | 4 | 0 |
| INT2017spe...00.perlbench_s/600.perlbench_s | 8 | 8 | 0 | 4 | 4 | 0 |
| INT2017speed/625.x264_s/625.x264_s | 43 | 43 | 0 | 47 | 47 | 0 |
| INT2017rate/525.x264_r/525.x264_r | 43 | 43 | 0 | 47 | 47 | 0 |
| INT2017rat...23.xalancbmk_r/523.xalancbmk_r | 6 | 6 | 0 | 6 | 6 | 0 |
| INT2017rate/520.omnetpp_r/520.omnetpp_r | 5 | 5 | 0 | 4 | 4 | 0 |
| INT2017rat...00.perlbench_r/500.perlbench_r | 8 | 8 | 0 | 4 | 4 | 0 |
| FP2017speed/644.nab_s/644.nab_s | 25 | 25 | 0 | 25 | 25 | 0 |
| FP2017speed/619.lbm_s/619.lbm_s | 42 | 42 | 0 | 42 | 42 | 0 |
| FP2017rate/544.nab_r/544.nab_r | 25 | 25 | 0 | 25 | 25 | 0 |
| FP2017rate/519.lbm_r/519.lbm_r | 42 | 42 | 0 | 42 | 42 | 0 |
| FP2017rate/511.povray_r/511.povray_r | 122 | 122 | 0 | 66 | 66 | 0 |
| INT2017spe...23.xalancbmk_s/623.xalancbmk_s | 6 | 6 | 0 | 6 | 6 | 0 |
| FP2017speed/638.imagick_s/638.imagick_s | 5054 | 5053 | -0.000197863 | 4433 | 3803 | -0.142116 |
| FP2017rate/538.imagick_r/538.imagick_r | 5054 | 5053 | -0.000197863 | 4433 | 3803 | -0.142116 |
| FP2017rate/510.parest_r/510.parest_r | 1349 | 1343 | -0.00444774 | 1089 | 1083 | -0.00550964 |
| FP2017rate/526.blender_r/526.blender_r | 1138 | 1127 | -0.00966608 | 1064 | 1025 | -0.0366541 |
| INT2017spe...31.deepsjeng_s/631.deepsjeng_s | 284 | 274 | -0.0352113 | 154 | 132 | -0.142857 |
| INT2017rat...31.deepsjeng_r/531.deepsjeng_r | 284 | 274 | -0.0352113 | 154 | 132 | -0.142857 |
| INT2017speed/602.gcc_s/602.gcc_s | 113 | 83 | -0.265487 | 107 | 77 | -0.280374 |
| INT2017rate/502.gcc_r/502.gcc_r | 113 | 83 | -0.265487 | 107 | 77 | -0.280374 |
| INT2017rate/505.mcf_r/505.mcf_r | 0 | 0 | nan | nan | nan | nan |
| INT2017rate/541.leela_r/541.leela_r | 0 | 0 | nan | nan | nan | nan |
| INT2017rate/557.xz_r/557.xz_r | 0 | 0 | nan | nan | nan | nan |
| INT2017speed/605.mcf_s/605.mcf_s | 0 | 0 | nan | nan | nan | nan |
| INT2017speed/641.leela_s/641.leela_s | 0 | 0 | nan | nan | nan | nan |
| INT2017speed/657.xz_s/657.xz_s | 0 | 0 | nan | nan | nan | nan |
| Geomean difference | nan | nan | -0.0314649 | nan | nan | -0.0580552 |
I think I was previously skeptical because I only measured the absolute number of spills/reloads, which didn't show much of a change:
| ('Program', '') | ('regalloc.NumSpills', 'lhs') | ('regalloc.NumSpills', 'rhs') | ('regalloc.NumSpills', 'diff') | ('regalloc.NumReloads', 'lhs') | ('regalloc.NumReloads', 'rhs') | ('regalloc.NumReloads', 'diff') |
|:--------------------------------------------|--------------------------------:|--------------------------------:|---------------------------------:|---------------------------------:|---------------------------------:|----------------------------------:|
| FP2017rate/526.blender_r/526.blender_r | 13411 | 13430 | 0.00141675 | 27478 | 27509 | 0.00112818 |
| INT2017speed/602.gcc_s/602.gcc_s | 11376 | 11381 | 0.000439522 | 25795 | 25800 | 0.000193836 |
| INT2017rate/502.gcc_r/502.gcc_r | 11376 | 11381 | 0.000439522 | 25795 | 25800 | 0.000193836 |
| FP2017rate/508.namd_r/508.namd_r | 6729 | 6729 | 0 | 16370 | 16370 | 0 |
| FP2017rate/510.parest_r/510.parest_r | 44293 | 44293 | 0 | 87404 | 87404 | 0 |
| INT2017speed/641.leela_s/641.leela_s | 310 | 310 | 0 | 449 | 449 | 0 |
| INT2017speed/625.x264_s/625.x264_s | 2147 | 2147 | 0 | 4598 | 4598 | 0 |
| INT2017spe...23.xalancbmk_s/623.xalancbmk_s | 1822 | 1822 | 0 | 2969 | 2969 | 0 |
| INT2017spe...ed/620.omnetpp_s/620.omnetpp_s | 719 | 719 | 0 | 1210 | 1210 | 0 |
| INT2017speed/605.mcf_s/605.mcf_s | 123 | 123 | 0 | 372 | 372 | 0 |
| INT2017spe...00.perlbench_s/600.perlbench_s | 4375 | 4375 | 0 | 9740 | 9740 | 0 |
| INT2017rate/557.xz_r/557.xz_r | 300 | 300 | 0 | 603 | 603 | 0 |
| INT2017rate/541.leela_r/541.leela_r | 310 | 310 | 0 | 449 | 449 | 0 |
| INT2017rate/525.x264_r/525.x264_r | 2147 | 2147 | 0 | 4598 | 4598 | 0 |
| INT2017rat...23.xalancbmk_r/523.xalancbmk_r | 1822 | 1822 | 0 | 2969 | 2969 | 0 |
| INT2017rate/520.omnetpp_r/520.omnetpp_r | 719 | 719 | 0 | 1210 | 1210 | 0 |
| INT2017rate/505.mcf_r/505.mcf_r | 123 | 123 | 0 | 372 | 372 | 0 |
| INT2017rat...00.perlbench_r/500.perlbench_r | 4375 | 4375 | 0 | 9740 | 9740 | 0 |
| FP2017speed/644.nab_s/644.nab_s | 713 | 713 | 0 | 1066 | 1066 | 0 |
| FP2017speed/619.lbm_s/619.lbm_s | 88 | 88 | 0 | 90 | 90 | 0 |
| FP2017rate/544.nab_r/544.nab_r | 713 | 713 | 0 | 1066 | 1066 | 0 |
| FP2017rate/519.lbm_r/519.lbm_r | 90 | 90 | 0 | 92 | 92 | 0 |
| FP2017rate/511.povray_r/511.povray_r | 1571 | 1571 | 0 | 3043 | 3043 | 0 |
| INT2017speed/657.xz_s/657.xz_s | 300 | 300 | 0 | 603 | 603 | 0 |
| FP2017speed/638.imagick_s/638.imagick_s | 4074 | 4054 | -0.00490918 | 10335 | 10452 | 0.0113208 |
| FP2017rate/538.imagick_r/538.imagick_r | 4074 | 4054 | -0.00490918 | 10335 | 10452 | 0.0113208 |
| INT2017rat...31.deepsjeng_r/531.deepsjeng_r | 344 | 341 | -0.00872093 | 690 | 691 | 0.00144928 |
| INT2017spe...31.deepsjeng_s/631.deepsjeng_s | 344 | 341 | -0.00872093 | 690 | 691 | 0.00144928 |
| Geomean difference | nan | nan | -0.000894825 | nan | nan | 0.000962103 |
So I guess that answers my worry that increasing LMUL 8 weights might increase smaller LMUL spills, i.e. it looks like it doesn't!
Thanks for being patient on this for so long, the code changes + test diff LGTM :)
https://github.com/llvm/llvm-project/pull/113675
More information about the llvm-commits
mailing list