[llvm] [RISCV][RegAlloc] Add getCSRFirstUseCost for RISC-V (PR #131349)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 14 09:11:23 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Michael Maitland (michaelmaitland)

<details>
<summary>Changes</summary>

This is based off of 63efd8e7e68bc.

The following table shows the percent change to the dynamic instruction count when the function in this patch returns 0 (default) versus other values.

| benchmark       | % speedup 1 over 0     | % speedup 4 over 0    | % speedup 16 over 0   | % speedup 64 over 0  | % speedup 128 over 0 |
| --------------- | ---------------------- | --------------------- | --------------------- | -------------------- | -------------------- |
| 500.perlbench_r | 0.001018570165         | 0.001049508358        | 0.001001106529        | 0.03382582818        | 0.03395354577        |
| 502.gcc_r       | 0.02850551412          | 0.02170512371         | 0.01453021263         | 0.06011008637        | 0.1215691521         |
| 505.mcf_r       | -0.00009506373338      | -0.00009090057642     | -0.0000860991497      | -0.00005027849766    | 0.00001251173791     |
| 520.omnetpp_r   | 0.2958940288           | 0.2959715925          | 0.2961141505          | 0.2959823497         | 0.2963124341         |
| 523.xalancbmk_r | -0.0327074721          | -0.01037021046        | -0.3226810542         | 0.02127133714        | 0.02765388389        |
| 525.x264_r      | 0.0000001381714403     | -0.00000007041540345  | -0.00000002156399465  | 0.0000002108993364   | 0.0000002463382874   |
| 531.deepsjeng_r | 0.00000000339777238    | 0.000000003874652714  | 0.000000003636212547  | 0.000000003874652714 | 0.000000003159332213 |
| 541.leela_r     | 0.0009186059953        | -0.000424159199       | 0.0004984456879       | 0.274948447          | 0.8135521414         |
| 557.xz_r        | -0.000000003547118854  | -0.00004896449559     | -0.00004910691576     | -0.0000491109983     | -0.00004895599589    |
| geomean         | 0.03265937388          | 0.03424232324         | -0.00107917442        | 0.07629116165        | 0.1439913192         |

The following table shows the percent change to the runtime when the function in this patch returns 0 (default) versus other values.

| benchmark       | % speedup 1 over 0 | % speedup 4 over 0 | % speedup 16 over 0 | % speedup 64 over 0 | %speedup 128 over 0 |
| --------------- | ------------------ | ------------------ | ------------------- | ------------------- | ------------------- |
| 500.perlbench_r | 0.1722356761       | 0.2269681109       | 0.2596825578        | 0.361573851         | 1.15041305          |
| 502.gcc_r       | -0.548415855       | -0.06187002799     | -0.5553684674       | -0.8876686237       | -0.4668665535       |
| 505.mcf_r       | -0.8786414258      | -0.4150938441      | -1.035517726        | -0.1860770377       | -0.01904825648      |
| 520.omnetpp_r   | 0.4130256072       | 0.6595976188       | 0.897332171         | 0.6252625622        | 0.3869467278        |
| 523.xalancbmk_r | 1.318132014        | -0.003927574       | 1.025962975         | 1.090320253         | -0.789206202        |
| 525.x264_r      | -0.03112871796     | -0.00167557587     | 0.06932423155       | -0.1919840015       | -0.1203585732       |
| 531.deepsjeng_r | -0.259516072       | -0.01973455652     | -0.2723227894       | -0.005417022257     | -0.02222388177      |
| 541.leela_r     | -0.3497178495      | -0.3510447393      | 0.1274508001        | 0.6485542452        | 0.2880651727        |
| 557.xz_r        | 0.7683565263       | -0.2197509447      | -0.0431183874       | 0.07518130872       | 0.5236853039        |
| geomean         | 0.06506952742      | -0.0211865386      | 0.05072694648       | 0.1684530637        | 0.1020533557        |

I chose to set the value to 64 on RISC-V because it has improvement to both the dynamic IC and the runtime and because AMDGPU set their number to 100, and callee-saved-spills are probably less expensive on RISC-V than on AMDGPU.

I looked at some diff and it seems like this patch leads to two things:
1. Less spilling -- not spilling the CSR led to better register allocation and helped us avoid spills down the line
2. Avoid spilling CSR but spill more on paths that static heuristics estimate as cold.

---
Full diff: https://github.com/llvm/llvm-project/pull/131349.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.h (+7) 
- (added) llvm/test/CodeGen/RISCV/csr-first-use-cost.ll (+105) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 0830191dde3f4..fc5852e081a22 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -61,6 +61,13 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
 
+  unsigned getCSRFirstUseCost() const override {
+    // The cost will be compared against BlockFrequency where entry has the
+    // value of 1 << 14. A value of 64 will choose to spill or split cold
+    // path instead of using a callee-saved register.
+    return 64;
+  }
+
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
 
   const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const override;
diff --git a/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll b/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll
new file mode 100644
index 0000000000000..62ad116256050
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/csr-first-use-cost.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc %s -mtriple=riscv64 -regalloc-csr-first-time-cost=0 | FileCheck %s -check-prefix=ZERO-COST
+; RUN: llc %s -mtriple=riscv64 -regalloc-csr-first-time-cost=64 | FileCheck %s -check-prefix=SOME-COST
+
+define fastcc void @Perl_sv_setnv(ptr %.str.54.3682) {
+; ZERO-COST-LABEL: Perl_sv_setnv:
+; ZERO-COST:       # %bb.0: # %entry
+; ZERO-COST-NEXT:    addi sp, sp, -32
+; ZERO-COST-NEXT:    .cfi_def_cfa_offset 32
+; ZERO-COST-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; ZERO-COST-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; ZERO-COST-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; ZERO-COST-NEXT:    .cfi_offset ra, -8
+; ZERO-COST-NEXT:    .cfi_offset s0, -16
+; ZERO-COST-NEXT:    .cfi_offset s1, -24
+; ZERO-COST-NEXT:    bnez zero, .LBB0_5
+; ZERO-COST-NEXT:  # %bb.1: # %entry
+; ZERO-COST-NEXT:    li a1, 1
+; ZERO-COST-NEXT:    bnez a1, .LBB0_6
+; ZERO-COST-NEXT:  .LBB0_2: # %entry
+; ZERO-COST-NEXT:    mv s0, a0
+; ZERO-COST-NEXT:    beqz zero, .LBB0_4
+; ZERO-COST-NEXT:  # %bb.3: # %sw.bb34.i
+; ZERO-COST-NEXT:    li s0, 0
+; ZERO-COST-NEXT:  .LBB0_4: # %Perl_sv_reftype.exit
+; ZERO-COST-NEXT:    li s1, 0
+; ZERO-COST-NEXT:    li a0, 0
+; ZERO-COST-NEXT:    li a1, 0
+; ZERO-COST-NEXT:    jalr s1
+; ZERO-COST-NEXT:    li a0, 0
+; ZERO-COST-NEXT:    mv a1, s0
+; ZERO-COST-NEXT:    li a2, 0
+; ZERO-COST-NEXT:    jalr s1
+; ZERO-COST-NEXT:  .LBB0_5: # %entry
+; ZERO-COST-NEXT:    beqz zero, .LBB0_2
+; ZERO-COST-NEXT:  .LBB0_6: # %sw.bb3
+; ZERO-COST-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; ZERO-COST-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; ZERO-COST-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; ZERO-COST-NEXT:    .cfi_restore ra
+; ZERO-COST-NEXT:    .cfi_restore s0
+; ZERO-COST-NEXT:    .cfi_restore s1
+; ZERO-COST-NEXT:    addi sp, sp, 32
+; ZERO-COST-NEXT:    .cfi_def_cfa_offset 0
+; ZERO-COST-NEXT:    ret
+;
+; SOME-COST-LABEL: Perl_sv_setnv:
+; SOME-COST:       # %bb.0: # %entry
+; SOME-COST-NEXT:    addi sp, sp, -32
+; SOME-COST-NEXT:    .cfi_def_cfa_offset 32
+; SOME-COST-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; SOME-COST-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; SOME-COST-NEXT:    .cfi_offset ra, -8
+; SOME-COST-NEXT:    .cfi_offset s0, -16
+; SOME-COST-NEXT:    bnez zero, .LBB0_5
+; SOME-COST-NEXT:  # %bb.1: # %entry
+; SOME-COST-NEXT:    li a1, 1
+; SOME-COST-NEXT:    bnez a1, .LBB0_6
+; SOME-COST-NEXT:  .LBB0_2: # %entry
+; SOME-COST-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; SOME-COST-NEXT:    beqz zero, .LBB0_4
+; SOME-COST-NEXT:  # %bb.3: # %sw.bb34.i
+; SOME-COST-NEXT:    sd zero, 8(sp) # 8-byte Folded Spill
+; SOME-COST-NEXT:  .LBB0_4: # %Perl_sv_reftype.exit
+; SOME-COST-NEXT:    li s0, 0
+; SOME-COST-NEXT:    li a0, 0
+; SOME-COST-NEXT:    li a1, 0
+; SOME-COST-NEXT:    jalr s0
+; SOME-COST-NEXT:    li a0, 0
+; SOME-COST-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; SOME-COST-NEXT:    li a2, 0
+; SOME-COST-NEXT:    jalr s0
+; SOME-COST-NEXT:  .LBB0_5: # %entry
+; SOME-COST-NEXT:    beqz zero, .LBB0_2
+; SOME-COST-NEXT:  .LBB0_6: # %sw.bb3
+; SOME-COST-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; SOME-COST-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; SOME-COST-NEXT:    .cfi_restore ra
+; SOME-COST-NEXT:    .cfi_restore s0
+; SOME-COST-NEXT:    addi sp, sp, 32
+; SOME-COST-NEXT:    .cfi_def_cfa_offset 0
+; SOME-COST-NEXT:    ret
+entry:
+  switch i8 0, label %Perl_sv_reftype.exit [
+    i8 1, label %sw.bb4
+    i8 12, label %sw.bb34.i
+    i8 3, label %sw.bb3
+    i8 0, label %sw.bb3
+  ]
+
+sw.bb3:                                           ; preds = %entry, %entry
+  ret void
+
+sw.bb4:                                           ; preds = %entry
+  br label %Perl_sv_reftype.exit
+
+sw.bb34.i:                                        ; preds = %entry
+  br label %Perl_sv_reftype.exit
+
+Perl_sv_reftype.exit:                             ; preds = %sw.bb34.i, %sw.bb4, %entry
+  %retval.0.i = phi ptr [ null, %sw.bb34.i ], [ null, %sw.bb4 ], [ %.str.54.3682, %entry ]
+  %call17 = tail call fastcc i64 null(ptr null, i32 0)
+  tail call void (ptr, ...) null(ptr null, ptr %retval.0.i, ptr null)
+  unreachable
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/131349


More information about the llvm-commits mailing list