[llvm] 00fabd2 - [RISCV][RegAlloc] Add getCSRFirstUseCost for RISC-V (#131349)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 20 12:20:07 PDT 2025


Author: Michael Maitland
Date: 2025-03-20T15:20:04-04:00
New Revision: 00fabd21bce85259a74a64a435874d290ed2da38

URL: https://github.com/llvm/llvm-project/commit/00fabd21bce85259a74a64a435874d290ed2da38
DIFF: https://github.com/llvm/llvm-project/commit/00fabd21bce85259a74a64a435874d290ed2da38.diff

LOG: [RISCV][RegAlloc] Add getCSRFirstUseCost for RISC-V (#131349)

This is based off of 63efd8e7e68bc.

The following table shows the percent change to the dynamic instruction
count when the function in this patch returns 0 (default) versus other
values.

| benchmark | % speedup 1 over 0 | % speedup 4 over 0 | % speedup 16
over 0 | % speedup 64 over 0 | % speedup 128 over 0 |
| --------------- | ---------------------- | --------------------- |
--------------------- | -------------------- | -------------------- |
| 500.perlbench_r | 0.001018570165 | 0.001049508358 | 0.001001106529 |
0.03382582818 | 0.03395354577 |
| 502.gcc_r | 0.02850551412 | 0.02170512371 | 0.01453021263 |
0.06011008637 | 0.1215691521 |
| 505.mcf_r | -0.00009506373338 | -0.00009090057642 | -0.0000860991497 |
-0.00005027849766 | 0.00001251173791 |
| 520.omnetpp_r | 0.2958940288 | 0.2959715925 | 0.2961141505 |
0.2959823497 | 0.2963124341 |
| 523.xalancbmk_r | -0.0327074721 | -0.01037021046 | -0.3226810542 |
0.02127133714 | 0.02765388389 |
| 525.x264_r | 0.0000001381714403 | -0.00000007041540345 |
-0.00000002156399465 | 0.0000002108993364 | 0.0000002463382874 |
| 531.deepsjeng_r | 0.00000000339777238 | 0.000000003874652714 |
0.000000003636212547 | 0.000000003874652714 | 0.000000003159332213 |
| 541.leela_r | 0.0009186059953 | -0.000424159199 | 0.0004984456879 |
0.274948447 | 0.8135521414 |
| 557.xz_r | -0.000000003547118854 | -0.00004896449559 |
-0.00004910691576 | -0.0000491109983 | -0.00004895599589 |
| geomean | 0.03265937388 | 0.03424232324 | -0.00107917442 |
0.07629116165 | 0.1439913192 |

The following table shows the percent change to the runtime when the
function in this patch returns 0 (default) versus other values.

| benchmark | % speedup 1 over 0 | % speedup 4 over 0 | % speedup 16
over 0 | % speedup 64 over 0 | %speedup 128 over 0 |
| --------------- | ------------------ | ------------------ |
------------------- | ------------------- | ------------------- |
| 500.perlbench_r | 0.1722356761 | 0.2269681109 | 0.2596825578 |
0.361573851 | 1.15041305 |
| 502.gcc_r | -0.548415855 | -0.06187002799 | -0.5553684674 |
-0.8876686237 | -0.4668665535 |
| 505.mcf_r | -0.8786414258 | -0.4150938441 | -1.035517726 |
-0.1860770377 | -0.01904825648 |
| 520.omnetpp_r | 0.4130256072 | 0.6595976188 | 0.897332171 |
0.6252625622 | 0.3869467278 |
| 523.xalancbmk_r | 1.318132014 | -0.003927574 | 1.025962975 |
1.090320253 | -0.789206202 |
| 525.x264_r | -0.03112871796 | -0.00167557587 | 0.06932423155 |
-0.1919840015 | -0.1203585732 |
| 531.deepsjeng_r | -0.259516072 | -0.01973455652 | -0.2723227894 |
-0.005417022257 | -0.02222388177 |
| 541.leela_r | -0.3497178495 | -0.3510447393 | 0.1274508001 |
0.6485542452 | 0.2880651727 |
| 557.xz_r | 0.7683565263 | -0.2197509447 | -0.0431183874 |
0.07518130872 | 0.5236853039 |
| geomean | 0.06506952742 | -0.0211865386 | 0.05072694648 | 0.1684530637
| 0.1020533557 |

I chose to set the value to 5 on RISC-V because it has improvement to
both the dynamic IC and the runtime and because it showed good results
empirically and had a similar effect as setting it to higher numbers.

I looked at some diff and it seems like this patch leads to two things:
1. Less spilling -- not spilling the CSR led to better register
allocation and helped us avoid spills down the line
2. Avoid spilling CSR but spill more on paths that static heuristics
estimate as cold.

Added: 
    llvm/test/CodeGen/RISCV/csr-first-use-cost.ll

Modified: 
    llvm/lib/CodeGen/RegAllocGreedy.cpp
    llvm/lib/Target/RISCV/RISCVRegisterInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 9c0216dba2a94..ac1e9fe1ca589 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2375,10 +2375,12 @@ void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
 }
 
 void RAGreedy::initializeCSRCost() {
-  // We use the larger one out of the command-line option and the value report
-  // by TRI.
+  // We use the command-line option if it is explicitly set, otherwise use the
+  // larger one out of the command-line option and the value reported by TRI.
   CSRCost = BlockFrequency(
-      std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
+      CSRFirstTimeCost.getNumOccurrences()
+          ? CSRFirstTimeCost
+          : std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
   if (!CSRCost.getFrequency())
     return;
 

diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 5260198878451..b368399e2ad14 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -61,6 +61,13 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
 
+  unsigned getCSRFirstUseCost() const override {
+    // The cost will be compared against BlockFrequency where entry has the
+    // value of 1 << 14. A value of 5 will choose to spill or split cold
+    // path instead of using a callee-saved register.
+    return 5;
+  }
+
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
 
   const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const override;

diff  --git a/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll b/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll
new file mode 100644
index 0000000000000..c6e5bae3c3c24
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -regalloc-csr-first-time-cost=0 < %s | FileCheck %s -check-prefix=ZERO-COST
+; RUN: llc -mtriple=riscv64 < %s | FileCheck %s -check-prefix=DEFAULT-COST
+
+define fastcc void @Perl_sv_setnv(i8 %c, ptr %.str.54.3682) nounwind {
+; ZERO-COST-LABEL: Perl_sv_setnv:
+; ZERO-COST:       # %bb.0: # %entry
+; ZERO-COST-NEXT:    addi sp, sp, -32
+; ZERO-COST-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; ZERO-COST-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; ZERO-COST-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; ZERO-COST-NEXT:    andi a0, a0, 255
+; ZERO-COST-NEXT:    li a2, 2
+; ZERO-COST-NEXT:    blt a2, a0, .LBB0_3
+; ZERO-COST-NEXT:  # %bb.1: # %entry
+; ZERO-COST-NEXT:    beqz a0, .LBB0_4
+; ZERO-COST-NEXT:  # %bb.2: # %entry
+; ZERO-COST-NEXT:    mv s0, a1
+; ZERO-COST-NEXT:    li a1, 1
+; ZERO-COST-NEXT:    beq a0, a1, .LBB0_6
+; ZERO-COST-NEXT:    j .LBB0_7
+; ZERO-COST-NEXT:  .LBB0_3: # %entry
+; ZERO-COST-NEXT:    li a2, 3
+; ZERO-COST-NEXT:    bne a0, a2, .LBB0_5
+; ZERO-COST-NEXT:  .LBB0_4: # %sw.bb3
+; ZERO-COST-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; ZERO-COST-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; ZERO-COST-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; ZERO-COST-NEXT:    addi sp, sp, 32
+; ZERO-COST-NEXT:    ret
+; ZERO-COST-NEXT:  .LBB0_5: # %entry
+; ZERO-COST-NEXT:    mv s0, a1
+; ZERO-COST-NEXT:    li a1, 12
+; ZERO-COST-NEXT:    bne a0, a1, .LBB0_7
+; ZERO-COST-NEXT:  .LBB0_6: # %sw.bb34.i
+; ZERO-COST-NEXT:    li s0, 0
+; ZERO-COST-NEXT:  .LBB0_7: # %Perl_sv_reftype.exit
+; ZERO-COST-NEXT:    li s1, 0
+; ZERO-COST-NEXT:    li a0, 0
+; ZERO-COST-NEXT:    li a1, 0
+; ZERO-COST-NEXT:    jalr s1
+; ZERO-COST-NEXT:    li a0, 0
+; ZERO-COST-NEXT:    mv a1, s0
+; ZERO-COST-NEXT:    li a2, 0
+; ZERO-COST-NEXT:    jalr s1
+;
+; DEFAULT-COST-LABEL: Perl_sv_setnv:
+; DEFAULT-COST:       # %bb.0: # %entry
+; DEFAULT-COST-NEXT:    addi sp, sp, -32
+; DEFAULT-COST-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; DEFAULT-COST-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; DEFAULT-COST-NEXT:    andi a0, a0, 255
+; DEFAULT-COST-NEXT:    li a2, 2
+; DEFAULT-COST-NEXT:    blt a2, a0, .LBB0_3
+; DEFAULT-COST-NEXT:  # %bb.1: # %entry
+; DEFAULT-COST-NEXT:    beqz a0, .LBB0_4
+; DEFAULT-COST-NEXT:  # %bb.2: # %entry
+; DEFAULT-COST-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; DEFAULT-COST-NEXT:    li a1, 1
+; DEFAULT-COST-NEXT:    beq a0, a1, .LBB0_6
+; DEFAULT-COST-NEXT:    j .LBB0_7
+; DEFAULT-COST-NEXT:  .LBB0_3: # %entry
+; DEFAULT-COST-NEXT:    li a2, 3
+; DEFAULT-COST-NEXT:    bne a0, a2, .LBB0_5
+; DEFAULT-COST-NEXT:  .LBB0_4: # %sw.bb3
+; DEFAULT-COST-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; DEFAULT-COST-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; DEFAULT-COST-NEXT:    addi sp, sp, 32
+; DEFAULT-COST-NEXT:    ret
+; DEFAULT-COST-NEXT:  .LBB0_5: # %entry
+; DEFAULT-COST-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; DEFAULT-COST-NEXT:    li a1, 12
+; DEFAULT-COST-NEXT:    bne a0, a1, .LBB0_7
+; DEFAULT-COST-NEXT:  .LBB0_6: # %sw.bb34.i
+; DEFAULT-COST-NEXT:    sd zero, 8(sp) # 8-byte Folded Spill
+; DEFAULT-COST-NEXT:  .LBB0_7: # %Perl_sv_reftype.exit
+; DEFAULT-COST-NEXT:    li s0, 0
+; DEFAULT-COST-NEXT:    li a0, 0
+; DEFAULT-COST-NEXT:    li a1, 0
+; DEFAULT-COST-NEXT:    jalr s0
+; DEFAULT-COST-NEXT:    li a0, 0
+; DEFAULT-COST-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; DEFAULT-COST-NEXT:    li a2, 0
+; DEFAULT-COST-NEXT:    jalr s0
+entry:
+  switch i8 %c, label %Perl_sv_reftype.exit [
+    i8 1, label %sw.bb4
+    i8 12, label %sw.bb34.i
+    i8 3, label %sw.bb3
+    i8 0, label %sw.bb3
+  ]
+
+sw.bb3:                                           ; preds = %entry, %entry
+  ret void
+
+sw.bb4:                                           ; preds = %entry
+  br label %Perl_sv_reftype.exit
+
+sw.bb34.i:                                        ; preds = %entry
+  br label %Perl_sv_reftype.exit
+
+Perl_sv_reftype.exit:                             ; preds = %sw.bb34.i, %sw.bb4, %entry
+  %retval.0.i = phi ptr [ null, %sw.bb34.i ], [ null, %sw.bb4 ], [ %.str.54.3682, %entry ]
+  %call17 = tail call fastcc i64 null(ptr null, i32 0)
+  tail call void (ptr, ...) null(ptr null, ptr %retval.0.i, ptr null)
+  unreachable
+}


        


More information about the llvm-commits mailing list