[llvm] [RISCV] Optimise spills/fills of FPR<->GPR moves (PR #78408)
Alex Bradbury via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 17 00:15:02 PST 2024
https://github.com/asb created https://github.com/llvm/llvm-project/pull/78408
If spilling the destination of a FPR<->GPR move, we can just store the source register instead. If filling the source of a FPR<->GPR move, we can just load the destination register instead. This avoids the fmv instruction because a GPR or FPR load/store can be used directly.
AArch64 and SystemZ implement a similar optimisation.
>From 7751aa58dbb70ae5bc80d007f12fc82181ee34a7 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 17 Jan 2024 06:47:59 +0000
Subject: [PATCH] [RISCV] Optimise spills/fills of FPR<->GPR moves
If spilling the destination of a FPR<->GPR move, we can just store the source
register instead. If filling the source of a FPR<->GPR move, we can just load
the destination register instead. This avoids the fmv instruction
because a GPR or FPR load/store can be used directly.
AArch64 and SystemZ implement a similar optimisation.
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 37 ++++++++++++++++++++--
llvm/test/CodeGen/RISCV/spill-fill-fold.ll | 36 +++++++--------------
2 files changed, 46 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 857e8979762cdc..1e7dd94900495c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -763,6 +763,37 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
VirtRegMap *VRM) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (Ops.size() != 1)
+ return nullptr;
+ unsigned Opcode = MI.getOpcode();
+
+ // If spilling the destination of a FPR<->GPR move, just store the source
+ // register instead. If filling the source of a FPR<->GPR move, just load
+ // the destination register instead.
+ if (Opcode == RISCV::FMV_D_X || Opcode == RISCV::FMV_W_X ||
+ Opcode == RISCV::FMV_X_D || Opcode == RISCV::FMV_X_W) {
+ bool IsSpill = Ops[0] == 0;
+ const MachineOperand &DstMO = MI.getOperand(0);
+ const MachineOperand &SrcMO = MI.getOperand(1);
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineBasicBlock &MBB = *MI.getParent();
+ Register DstReg = DstMO.getReg();
+ Register SrcReg = SrcMO.getReg();
+
+ auto getRegClass = [&](unsigned Reg) {
+ return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
+ : TRI.getMinimalPhysRegClass(Reg);
+ };
+ if (IsSpill)
+ storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
+ getRegClass(SrcReg), &TRI, Register());
+ else
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
+ getRegClass(DstReg), &TRI, Register());
+ return &*--InsertPt;
+ }
+
// The below optimizations narrow the load so they are only valid for little
// endian.
// TODO: Support big endian by adding an offset into the frame object?
@@ -770,11 +801,11 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
return nullptr;
// Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
- if (Ops.size() != 1 || Ops[0] != 1)
- return nullptr;
+ if (Ops[0] != 1)
+ return nullptr;
unsigned LoadOpc;
- switch (MI.getOpcode()) {
+ switch (Opcode) {
default:
if (RISCV::isSEXT_W(MI)) {
LoadOpc = RISCV::LW;
diff --git a/llvm/test/CodeGen/RISCV/spill-fill-fold.ll b/llvm/test/CodeGen/RISCV/spill-fill-fold.ll
index a9a0cc5cf94d85..54678eaa2b58b0 100644
--- a/llvm/test/CodeGen/RISCV/spill-fill-fold.ll
+++ b/llvm/test/CodeGen/RISCV/spill-fill-fold.ll
@@ -36,8 +36,7 @@ define float @spill_i32_to_float(i32 %a) nounwind {
; RV32ID-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill
; RV32ID-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill
; RV32ID-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill
-; RV32ID-NEXT: fmv.w.x fa5, a0
-; RV32ID-NEXT: fsw fa5, 4(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
; RV32ID-NEXT: #APP
; RV32ID-NEXT: #NO_APP
; RV32ID-NEXT: flw fa0, 4(sp) # 4-byte Folded Reload
@@ -97,8 +96,7 @@ define float @spill_i32_to_float(i32 %a) nounwind {
; RV64ID-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill
-; RV64ID-NEXT: fmv.w.x fa5, a0
-; RV64ID-NEXT: fsw fa5, 4(sp) # 4-byte Folded Spill
+; RV64ID-NEXT: sd a0, 4(sp) # 4-byte Folded Spill
; RV64ID-NEXT: #APP
; RV64ID-NEXT: #NO_APP
; RV64ID-NEXT: flw fa0, 4(sp) # 4-byte Folded Reload
@@ -163,8 +161,7 @@ define i32 @spill_float_to_i32(float %a) nounwind {
; RV32ID-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill
; RV32ID-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill
; RV32ID-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
-; RV32ID-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: fsw fa0, 4(sp) # 4-byte Folded Spill
; RV32ID-NEXT: #APP
; RV32ID-NEXT: #NO_APP
; RV32ID-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
@@ -224,8 +221,7 @@ define i32 @spill_float_to_i32(float %a) nounwind {
; RV64ID-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill
-; RV64ID-NEXT: fmv.x.w a0, fa0
-; RV64ID-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; RV64ID-NEXT: fsw fa0, 0(sp) # 8-byte Folded Spill
; RV64ID-NEXT: #APP
; RV64ID-NEXT: #NO_APP
; RV64ID-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
@@ -353,8 +349,7 @@ define double @spill_i64_to_double(i64 %a) nounwind {
; RV64ID-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill
-; RV64ID-NEXT: fmv.d.x fa5, a0
-; RV64ID-NEXT: fsd fa5, 0(sp) # 8-byte Folded Spill
+; RV64ID-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
; RV64ID-NEXT: #APP
; RV64ID-NEXT: #NO_APP
; RV64ID-NEXT: fld fa0, 0(sp) # 8-byte Folded Reload
@@ -484,8 +479,7 @@ define i64 @spill_double_to_i64(double %a) nounwind {
; RV64ID-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill
-; RV64ID-NEXT: fmv.x.d a0, fa0
-; RV64ID-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; RV64ID-NEXT: fsd fa0, 0(sp) # 8-byte Folded Spill
; RV64ID-NEXT: #APP
; RV64ID-NEXT: #NO_APP
; RV64ID-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
@@ -553,8 +547,7 @@ define float @fill_i32_to_float(i32 %a) nounwind {
; RV32ID-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
; RV32ID-NEXT: #APP
; RV32ID-NEXT: #NO_APP
-; RV32ID-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
-; RV32ID-NEXT: fmv.w.x fa0, a0
+; RV32ID-NEXT: flw fa0, 4(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
@@ -614,8 +607,7 @@ define float @fill_i32_to_float(i32 %a) nounwind {
; RV64ID-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
; RV64ID-NEXT: #APP
; RV64ID-NEXT: #NO_APP
-; RV64ID-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
-; RV64ID-NEXT: fmv.w.x fa0, a0
+; RV64ID-NEXT: flw fa0, 0(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
@@ -680,8 +672,7 @@ define i32 @fill_float_to_i32(float %a) nounwind {
; RV32ID-NEXT: fsw fa0, 4(sp) # 4-byte Folded Spill
; RV32ID-NEXT: #APP
; RV32ID-NEXT: #NO_APP
-; RV32ID-NEXT: flw fa5, 4(sp) # 4-byte Folded Reload
-; RV32ID-NEXT: fmv.x.w a0, fa5
+; RV32ID-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
@@ -741,8 +732,7 @@ define i32 @fill_float_to_i32(float %a) nounwind {
; RV64ID-NEXT: fsw fa0, 4(sp) # 4-byte Folded Spill
; RV64ID-NEXT: #APP
; RV64ID-NEXT: #NO_APP
-; RV64ID-NEXT: flw fa5, 4(sp) # 4-byte Folded Reload
-; RV64ID-NEXT: fmv.x.w a0, fa5
+; RV64ID-NEXT: ld a0, 4(sp) # 4-byte Folded Reload
; RV64ID-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
@@ -870,8 +860,7 @@ define double @fill_i64_to_double(i64 %a) nounwind {
; RV64ID-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
; RV64ID-NEXT: #APP
; RV64ID-NEXT: #NO_APP
-; RV64ID-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
-; RV64ID-NEXT: fmv.d.x fa0, a0
+; RV64ID-NEXT: fld fa0, 0(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
@@ -999,8 +988,7 @@ define i64 @fill_double_to_i64(double %a) nounwind {
; RV64ID-NEXT: fsd fa0, 0(sp) # 8-byte Folded Spill
; RV64ID-NEXT: #APP
; RV64ID-NEXT: #NO_APP
-; RV64ID-NEXT: fld fa5, 0(sp) # 8-byte Folded Reload
-; RV64ID-NEXT: fmv.x.d a0, fa5
+; RV64ID-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
; RV64ID-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
More information about the llvm-commits
mailing list