[llvm] [RegAlloc] Scale the spill weight by target factor (PR #113675)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 12 01:10:49 PDT 2025


https://github.com/lukel97 approved this pull request.

I added some statistics to collect the total "lmul" that is spilled and reloaded statically with this change:

```diff
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2fdf6bd36e88..f4b5a5b29e71 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -18,6 +18,7 @@
 #include "RISCVSubtarget.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveIntervals.h"
@@ -43,6 +44,10 @@ using namespace llvm;
 #define GET_INSTRINFO_NAMED_OPS
 #include "RISCVGenInstrInfo.inc"
 
+#define DEBUG_TYPE "riscv-instr-info"
+STATISTIC(TotalLMULSpilled, "Total LMUL spilled");
+STATISTIC(TotalLMULReloaded, "Total LMUL reloaded");
+
 static cl::opt<bool> PreferWholeRegisterMove(
     "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
     cl::desc("Prefer whole register move for vector registers."));
@@ -615,12 +620,16 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     IsScalableVector = false;
   } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
     Opcode = RISCV::VS1R_V;
+    TotalLMULSpilled += 1;
   } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
     Opcode = RISCV::VS2R_V;
+    TotalLMULSpilled += 2;
   } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
     Opcode = RISCV::VS4R_V;
+    TotalLMULSpilled += 4;
   } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
     Opcode = RISCV::VS8R_V;
+    TotalLMULSpilled += 8;
   } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
     Opcode = RISCV::PseudoVSPILL2_M1;
   else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
@@ -706,12 +715,16 @@ void RISCVInstrInfo::loadRegFromStackSlot(
     IsScalableVector = false;
   } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
     Opcode = RISCV::VL1RE8_V;
+    TotalLMULReloaded += 1;
   } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
     Opcode = RISCV::VL2RE8_V;
+    TotalLMULReloaded += 2;
   } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
     Opcode = RISCV::VL4RE8_V;
+    TotalLMULReloaded += 4;
   } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
     Opcode = RISCV::VL8RE8_V;
+    TotalLMULReloaded += 8;
   } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
     Opcode = RISCV::PseudoVRELOAD2_M1;
   else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
```

Running it against SPEC CPU 2017, -O3 -march=rva23u64, it looks like this helps a good bit on imagick/gcc/deepsjeng! 

| ('Program', '')                             |   ('riscv-instr-info.TotalLMULReloaded', 'lhs') |   ('riscv-instr-info.TotalLMULReloaded', 'rhs') |   ('riscv-instr-info.TotalLMULReloaded', 'diff') |   ('riscv-instr-info.TotalLMULSpilled', 'lhs') |   ('riscv-instr-info.TotalLMULSpilled', 'rhs') |   ('riscv-instr-info.TotalLMULSpilled', 'diff') |
|:--------------------------------------------|------------------------------------------------:|------------------------------------------------:|-------------------------------------------------:|-----------------------------------------------:|-----------------------------------------------:|------------------------------------------------:|
| FP2017rate/508.namd_r/508.namd_r            |                                               6 |                                               6 |                                      0           |                                              1 |                                              1 |                                      0          |
| INT2017spe...ed/620.omnetpp_s/620.omnetpp_s |                                               5 |                                               5 |                                      0           |                                              4 |                                              4 |                                      0          |
| INT2017spe...00.perlbench_s/600.perlbench_s |                                               8 |                                               8 |                                      0           |                                              4 |                                              4 |                                      0          |
| INT2017speed/625.x264_s/625.x264_s          |                                              43 |                                              43 |                                      0           |                                             47 |                                             47 |                                      0          |
| INT2017rate/525.x264_r/525.x264_r           |                                              43 |                                              43 |                                      0           |                                             47 |                                             47 |                                      0          |
| INT2017rat...23.xalancbmk_r/523.xalancbmk_r |                                               6 |                                               6 |                                      0           |                                              6 |                                              6 |                                      0          |
| INT2017rate/520.omnetpp_r/520.omnetpp_r     |                                               5 |                                               5 |                                      0           |                                              4 |                                              4 |                                      0          |
| INT2017rat...00.perlbench_r/500.perlbench_r |                                               8 |                                               8 |                                      0           |                                              4 |                                              4 |                                      0          |
| FP2017speed/644.nab_s/644.nab_s             |                                              25 |                                              25 |                                      0           |                                             25 |                                             25 |                                      0          |
| FP2017speed/619.lbm_s/619.lbm_s             |                                              42 |                                              42 |                                      0           |                                             42 |                                             42 |                                      0          |
| FP2017rate/544.nab_r/544.nab_r              |                                              25 |                                              25 |                                      0           |                                             25 |                                             25 |                                      0          |
| FP2017rate/519.lbm_r/519.lbm_r              |                                              42 |                                              42 |                                      0           |                                             42 |                                             42 |                                      0          |
| FP2017rate/511.povray_r/511.povray_r        |                                             122 |                                             122 |                                      0           |                                             66 |                                             66 |                                      0          |
| INT2017spe...23.xalancbmk_s/623.xalancbmk_s |                                               6 |                                               6 |                                      0           |                                              6 |                                              6 |                                      0          |
| FP2017speed/638.imagick_s/638.imagick_s     |                                            5054 |                                            5053 |                                     -0.000197863 |                                           4433 |                                           3803 |                                     -0.142116   |
| FP2017rate/538.imagick_r/538.imagick_r      |                                            5054 |                                            5053 |                                     -0.000197863 |                                           4433 |                                           3803 |                                     -0.142116   |
| FP2017rate/510.parest_r/510.parest_r        |                                            1349 |                                            1343 |                                     -0.00444774  |                                           1089 |                                           1083 |                                     -0.00550964 |
| FP2017rate/526.blender_r/526.blender_r      |                                            1138 |                                            1127 |                                     -0.00966608  |                                           1064 |                                           1025 |                                     -0.0366541  |
| INT2017spe...31.deepsjeng_s/631.deepsjeng_s |                                             284 |                                             274 |                                     -0.0352113   |                                            154 |                                            132 |                                     -0.142857   |
| INT2017rat...31.deepsjeng_r/531.deepsjeng_r |                                             284 |                                             274 |                                     -0.0352113   |                                            154 |                                            132 |                                     -0.142857   |
| INT2017speed/602.gcc_s/602.gcc_s            |                                             113 |                                              83 |                                     -0.265487    |                                            107 |                                             77 |                                     -0.280374   |
| INT2017rate/502.gcc_r/502.gcc_r             |                                             113 |                                              83 |                                     -0.265487    |                                            107 |                                             77 |                                     -0.280374   |
| INT2017rate/505.mcf_r/505.mcf_r             |                                               0 |                                               0 |                                    nan           |                                            nan |                                            nan |                                    nan          |
| INT2017rate/541.leela_r/541.leela_r         |                                               0 |                                               0 |                                    nan           |                                            nan |                                            nan |                                    nan          |
| INT2017rate/557.xz_r/557.xz_r               |                                               0 |                                               0 |                                    nan           |                                            nan |                                            nan |                                    nan          |
| INT2017speed/605.mcf_s/605.mcf_s            |                                               0 |                                               0 |                                    nan           |                                            nan |                                            nan |                                    nan          |
| INT2017speed/641.leela_s/641.leela_s        |                                               0 |                                               0 |                                    nan           |                                            nan |                                            nan |                                    nan          |
| INT2017speed/657.xz_s/657.xz_s              |                                               0 |                                               0 |                                    nan           |                                            nan |                                            nan |                                    nan          |
| Geomean difference                          |                                             nan |                                             nan |                                     -0.0314649   |                                            nan |                                            nan |                                     -0.0580552  |

I think I was previously skeptical because I only measured the absolute number of spills/reloads, which didn't show much of a change:

| ('Program', '')                             |   ('regalloc.NumSpills', 'lhs') |   ('regalloc.NumSpills', 'rhs') |   ('regalloc.NumSpills', 'diff') |   ('regalloc.NumReloads', 'lhs') |   ('regalloc.NumReloads', 'rhs') |   ('regalloc.NumReloads', 'diff') |
|:--------------------------------------------|--------------------------------:|--------------------------------:|---------------------------------:|---------------------------------:|---------------------------------:|----------------------------------:|
| FP2017rate/526.blender_r/526.blender_r      |                           13411 |                           13430 |                      0.00141675  |                            27478 |                            27509 |                       0.00112818  |
| INT2017speed/602.gcc_s/602.gcc_s            |                           11376 |                           11381 |                      0.000439522 |                            25795 |                            25800 |                       0.000193836 |
| INT2017rate/502.gcc_r/502.gcc_r             |                           11376 |                           11381 |                      0.000439522 |                            25795 |                            25800 |                       0.000193836 |
| FP2017rate/508.namd_r/508.namd_r            |                            6729 |                            6729 |                      0           |                            16370 |                            16370 |                       0           |
| FP2017rate/510.parest_r/510.parest_r        |                           44293 |                           44293 |                      0           |                            87404 |                            87404 |                       0           |
| INT2017speed/641.leela_s/641.leela_s        |                             310 |                             310 |                      0           |                              449 |                              449 |                       0           |
| INT2017speed/625.x264_s/625.x264_s          |                            2147 |                            2147 |                      0           |                             4598 |                             4598 |                       0           |
| INT2017spe...23.xalancbmk_s/623.xalancbmk_s |                            1822 |                            1822 |                      0           |                             2969 |                             2969 |                       0           |
| INT2017spe...ed/620.omnetpp_s/620.omnetpp_s |                             719 |                             719 |                      0           |                             1210 |                             1210 |                       0           |
| INT2017speed/605.mcf_s/605.mcf_s            |                             123 |                             123 |                      0           |                              372 |                              372 |                       0           |
| INT2017spe...00.perlbench_s/600.perlbench_s |                            4375 |                            4375 |                      0           |                             9740 |                             9740 |                       0           |
| INT2017rate/557.xz_r/557.xz_r               |                             300 |                             300 |                      0           |                              603 |                              603 |                       0           |
| INT2017rate/541.leela_r/541.leela_r         |                             310 |                             310 |                      0           |                              449 |                              449 |                       0           |
| INT2017rate/525.x264_r/525.x264_r           |                            2147 |                            2147 |                      0           |                             4598 |                             4598 |                       0           |
| INT2017rat...23.xalancbmk_r/523.xalancbmk_r |                            1822 |                            1822 |                      0           |                             2969 |                             2969 |                       0           |
| INT2017rate/520.omnetpp_r/520.omnetpp_r     |                             719 |                             719 |                      0           |                             1210 |                             1210 |                       0           |
| INT2017rate/505.mcf_r/505.mcf_r             |                             123 |                             123 |                      0           |                              372 |                              372 |                       0           |
| INT2017rat...00.perlbench_r/500.perlbench_r |                            4375 |                            4375 |                      0           |                             9740 |                             9740 |                       0           |
| FP2017speed/644.nab_s/644.nab_s             |                             713 |                             713 |                      0           |                             1066 |                             1066 |                       0           |
| FP2017speed/619.lbm_s/619.lbm_s             |                              88 |                              88 |                      0           |                               90 |                               90 |                       0           |
| FP2017rate/544.nab_r/544.nab_r              |                             713 |                             713 |                      0           |                             1066 |                             1066 |                       0           |
| FP2017rate/519.lbm_r/519.lbm_r              |                              90 |                              90 |                      0           |                               92 |                               92 |                       0           |
| FP2017rate/511.povray_r/511.povray_r        |                            1571 |                            1571 |                      0           |                             3043 |                             3043 |                       0           |
| INT2017speed/657.xz_s/657.xz_s              |                             300 |                             300 |                      0           |                              603 |                              603 |                       0           |
| FP2017speed/638.imagick_s/638.imagick_s     |                            4074 |                            4054 |                     -0.00490918  |                            10335 |                            10452 |                       0.0113208   |
| FP2017rate/538.imagick_r/538.imagick_r      |                            4074 |                            4054 |                     -0.00490918  |                            10335 |                            10452 |                       0.0113208   |
| INT2017rat...31.deepsjeng_r/531.deepsjeng_r |                             344 |                             341 |                     -0.00872093  |                              690 |                              691 |                       0.00144928  |
| INT2017spe...31.deepsjeng_s/631.deepsjeng_s |                             344 |                             341 |                     -0.00872093  |                              690 |                              691 |                       0.00144928  |
| Geomean difference                          |                             nan |                             nan |                     -0.000894825 |                              nan |                              nan |                       0.000962103 |

So I guess that answers my worry that increasing LMUL 8 weights might increase smaller LMUL spills, i.e. it looks like it doesn't!

Thanks for being patient on this for so long, the code changes + test diff LGTM :)

https://github.com/llvm/llvm-project/pull/113675


More information about the llvm-commits mailing list