[llvm] 7d5c8cb - [LoongArch] Added spill/reload/copy support for CFRs
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 10 04:21:33 PST 2022
Author: wanglei
Date: 2022-11-10T20:12:18+08:00
New Revision: 7d5c8cb023515efd1d80a9120f27ed5cdb1d1404
URL: https://github.com/llvm/llvm-project/commit/7d5c8cb023515efd1d80a9120f27ed5cdb1d1404
DIFF: https://github.com/llvm/llvm-project/commit/7d5c8cb023515efd1d80a9120f27ed5cdb1d1404.diff
LOG: [LoongArch] Added spill/reload/copy support for CFRs
1, spill/reload
When a function call is made immediately after a floating point
comparison, the result of the comparison needs to be spilled before
function call and reloaded after the function returns.
2, copy
Support `GPR` to `CFR` and `CFR` to `GRP` copys. Therefore, the correct
register class can be used in the pattern template, and the hard-coding
of mutual coping of `CFR` and `GRP` is eliminated, reducing redundant
comparison instructions.
Note: Since the `COPY` instruction between CFRs is not provided in
LoongArch, we only use `$fcc0` in the register allocation.
Reviewed By: SixWeining
Differential Revision: https://reviews.llvm.org/D137004
Added:
llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll
llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll
llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 3767fc1d793ba..40e7665fb1f7e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -121,6 +121,13 @@ def FLDLE_S : FP_LOAD_3R<0b00111000011101010, "fldle.s", FPR32>;
def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>;
def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>;
+// Pseudo instructions for spill/reload CFRs.
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+def PseudoST_CFR : Pseudo<(outs),
+ (ins CFR:$ccd, GPR:$rj, grlenimm:$imm)>;
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+def PseudoLD_CFR : Pseudo<(outs CFR:$ccd),
+ (ins GPR:$rj, grlenimm:$imm)>;
} // Predicates = [HasBasicF]
//===----------------------------------------------------------------------===//
@@ -159,7 +166,7 @@ def : Pat<(fcanonicalize FPR32:$fj), (FMAX_S $fj, $fj)>;
class PatFPSetcc<CondCode cc, LAInst CmpInst, RegisterClass RegTy>
: Pat<(any_fsetcc RegTy:$fj, RegTy:$fk, cc),
- (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>;
+ (CmpInst RegTy:$fj, RegTy:$fk)>;
// SETOGT/SETOGE/SETUGT/SETUGE/SETGE/SETNE/SETGT will expand into
// SETOLT/SETOLE/SETULT/SETULE/SETLE/SETEQ/SETLT.
def : PatFPSetcc<SETOEQ, FCMP_CEQ_S, FPR32>;
@@ -200,7 +207,7 @@ defm : PatFPBrcond<SETLT, FCMP_CLT_S, FPR32>;
class PatStrictFsetccs<CondCode cc, LAInst CmpInst, RegisterClass RegTy>
: Pat<(strict_fsetccs RegTy:$fj, RegTy:$fk, cc),
- (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>;
+ (CmpInst RegTy:$fj, RegTy:$fk)>;
def : PatStrictFsetccs<SETOEQ, FCMP_SEQ_S, FPR32>;
def : PatStrictFsetccs<SETOLT, FCMP_SLT_S, FPR32>;
def : PatStrictFsetccs<SETOLE, FCMP_SLE_S, FPR32>;
@@ -215,8 +222,8 @@ def : PatStrictFsetccs<SETLT, FCMP_SLT_S, FPR32>;
/// Select
-def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj),
- (FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>;
+def : Pat<(select CFR:$cc, FPR32:$fk, FPR32:$fj),
+ (FSEL_S FPR32:$fj, FPR32:$fk, CFR:$cc)>;
/// Selectcc
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index 44a80054f0a90..50d7e9920ea99 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -212,8 +212,8 @@ def : PatStrictFsetccs<SETLT, FCMP_SLT_D, FPR64>;
/// Select
-def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj),
- (FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>;
+def : Pat<(select CFR:$cc, FPR64:$fk, FPR64:$fj),
+ (FSEL_D FPR64:$fj, FPR64:$fk, CFR:$cc)>;
/// Selectcc
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index cc0b279aef647..00b806a8909d8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -13,6 +13,8 @@
#include "LoongArchInstrInfo.h"
#include "LoongArch.h"
#include "LoongArchMachineFunctionInfo.h"
+#include "LoongArchRegisterInfo.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "MCTargetDesc/LoongArchMatInt.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -37,6 +39,21 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // GPR->CFR copy.
+ if (LoongArch::CFRRegClass.contains(DstReg) &&
+ LoongArch::GPRRegClass.contains(SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(LoongArch::MOVGR2CF), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ // CFR->GPR copy.
+ if (LoongArch::GPRRegClass.contains(DstReg) &&
+ LoongArch::CFRRegClass.contains(SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(LoongArch::MOVCF2GR), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
// FPR->FPR copies.
unsigned Opc;
if (LoongArch::FPR32RegClass.contains(DstReg, SrcReg)) {
@@ -71,6 +88,8 @@ void LoongArchInstrInfo::storeRegToStackSlot(
Opcode = LoongArch::FST_S;
else if (LoongArch::FPR64RegClass.hasSubClassEq(RC))
Opcode = LoongArch::FST_D;
+ else if (LoongArch::CFRRegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::PseudoST_CFR;
else
llvm_unreachable("Can't store this register to stack slot");
@@ -104,6 +123,8 @@ void LoongArchInstrInfo::loadRegFromStackSlot(
Opcode = LoongArch::FLD_S;
else if (LoongArch::FPR64RegClass.hasSubClassEq(RC))
Opcode = LoongArch::FLD_D;
+ else if (LoongArch::CFRRegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::PseudoLD_CFR;
else
llvm_unreachable("Can't load this register from stack slot");
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
index 822a188ae0799..8655c5bdf7816 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
@@ -94,6 +94,13 @@ LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasBP(MF))
markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp
+ // FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0.
+ // This is required to work around the fact that COPY instruction between CFRs
+ // is not provided in LoongArch.
+ if (MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
+ for (size_t Reg = LoongArch::FCC1; Reg <= LoongArch::FCC7; ++Reg)
+ markSuperRegs(Reserved, Reg);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
@@ -124,6 +131,8 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const LoongArchInstrInfo *TII = STI.getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
DebugLoc DL = MI.getDebugLoc();
+ bool IsLA64 = STI.is64Bit();
+ unsigned MIOpc = MI.getOpcode();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
Register FrameReg;
@@ -134,14 +143,14 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
bool FrameRegIsKill = false;
if (!isInt<12>(Offset.getFixed())) {
- unsigned Addi = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
- unsigned Add = STI.is64Bit() ? LoongArch::ADD_D : LoongArch::ADD_W;
+ unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
+ unsigned Add = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W;
// The offset won't fit in an immediate, so use a scratch register instead.
// Modify Offset and FrameReg appropriately.
Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed());
- if (MI.getOpcode() == Addi) {
+ if (MIOpc == Addi) {
BuildMI(MBB, II, DL, TII->get(Add), MI.getOperand(0).getReg())
.addReg(FrameReg)
.addReg(ScratchReg, RegState::Kill);
@@ -156,6 +165,33 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
FrameRegIsKill = true;
}
+ // Spill CFRs.
+ if (MIOpc == LoongArch::PseudoST_CFR) {
+ Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(LoongArch::MOVCF2GR), ScratchReg)
+ .add(MI.getOperand(0));
+ BuildMI(MBB, II, DL, TII->get(IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(FrameReg)
+ .addImm(Offset.getFixed());
+ MI.eraseFromParent();
+ return;
+ }
+
+ // Reload CFRs.
+ if (MIOpc == LoongArch::PseudoLD_CFR) {
+ Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(IsLA64 ? LoongArch::LD_D : LoongArch::LD_W),
+ ScratchReg)
+ .addReg(FrameReg)
+ .addImm(Offset.getFixed());
+ BuildMI(MBB, II, DL, TII->get(LoongArch::MOVGR2CF))
+ .add(MI.getOperand(0))
+ .addReg(ScratchReg, RegState::Kill);
+ MI.eraseFromParent();
+ return;
+ }
+
MI.getOperand(FIOperandNum)
.ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll
index 14fdf82319321..8058f7b0810ce 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll
@@ -324,7 +324,6 @@ define i1 @fcmp_fast_oeq(double %a, double %b, i1 %c) nounwind {
; LA64-NEXT: # %bb.1: # %if.then
; LA64-NEXT: ret
; LA64-NEXT: .LBB17_2: # %if.else
-; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1
; LA64-NEXT: movcf2gr $a0, $fcc0
; LA64-NEXT: ret
%cmp = fcmp fast oeq double %a, 0.000000e+00
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll
index 30803e6197919..d0f8d5342280d 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll
@@ -304,7 +304,6 @@ define i1 @fcmp_fast_oeq(float %a, float %b, i1 %c) nounwind {
; LA32-NEXT: # %bb.1: # %if.then
; LA32-NEXT: ret
; LA32-NEXT: .LBB17_2: # %if.else
-; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1
; LA32-NEXT: movcf2gr $a0, $fcc0
; LA32-NEXT: ret
;
@@ -318,7 +317,6 @@ define i1 @fcmp_fast_oeq(float %a, float %b, i1 %c) nounwind {
; LA64-NEXT: # %bb.1: # %if.then
; LA64-NEXT: ret
; LA64-NEXT: .LBB17_2: # %if.else
-; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1
; LA64-NEXT: movcf2gr $a0, $fcc0
; LA64-NEXT: ret
%cmp = fcmp fast oeq float %a, 0.000000e+00
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
index a18cb6f756a60..b57d96aee32f4 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll
@@ -492,9 +492,9 @@ define float @convert_u32_to_float(i32 %a) nounwind {
; LA32F-NEXT: ffint.s.w $fa0, $fa0
; LA32F-NEXT: fadd.s $fa0, $fa0, $fa0
; LA32F-NEXT: slti $a1, $a0, 0
-; LA32F-NEXT: movgr2cf $fcc0, $a1
; LA32F-NEXT: movgr2fr.w $fa1, $a0
; LA32F-NEXT: ffint.s.w $fa1, $fa1
+; LA32F-NEXT: movgr2cf $fcc0, $a1
; LA32F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0
; LA32F-NEXT: ret
;
@@ -570,9 +570,9 @@ define float @convert_u64_to_float(i64 %a) nounwind {
; LA64D-NEXT: ffint.s.l $fa0, $fa0
; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0
; LA64D-NEXT: slti $a1, $a0, 0
-; LA64D-NEXT: movgr2cf $fcc0, $a1
; LA64D-NEXT: movgr2fr.d $fa1, $a0
; LA64D-NEXT: ffint.s.l $fa1, $fa1
+; LA64D-NEXT: movgr2cf $fcc0, $a1
; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0
; LA64D-NEXT: ret
%1 = uitofp i64 %a to float
diff --git a/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll
new file mode 100644
index 0000000000000..63407ad003f69
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64
+
+;; Check the $fcc* register is spilled before funtion call and then reloaded.
+declare void @foo()
+
+define i1 @load_store_fcc_reg(float %a, i1 %c) {
+; LA32-LABEL: load_store_fcc_reg:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: .cfi_def_cfa_offset 32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: .cfi_offset 56, -16
+; LA32-NEXT: .cfi_offset 57, -24
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: movgr2fr.w $fs1, $zero
+; LA32-NEXT: fcmp.cult.s $fcc0, $fs1, $fa0
+; LA32-NEXT: movcf2gr $a0, $fcc0
+; LA32-NEXT: st.w $a0, $sp, 4
+; LA32-NEXT: bl %plt(foo)
+; LA32-NEXT: ld.w $a0, $sp, 4
+; LA32-NEXT: movgr2cf $fcc0, $a0
+; LA32-NEXT: bcnez $fcc0, .LBB0_2
+; LA32-NEXT: # %bb.1: # %if.then
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: b .LBB0_3
+; LA32-NEXT: .LBB0_2: # %if.else
+; LA32-NEXT: fcmp.cle.s $fcc0, $fs0, $fs1
+; LA32-NEXT: movcf2gr $a0, $fcc0
+; LA32-NEXT: .LBB0_3: # %if.then
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_store_fcc_reg:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: .cfi_def_cfa_offset 48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: .cfi_offset 56, -24
+; LA64-NEXT: .cfi_offset 57, -32
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: movgr2fr.w $fs1, $zero
+; LA64-NEXT: fcmp.cult.s $fcc0, $fs1, $fa0
+; LA64-NEXT: movcf2gr $a0, $fcc0
+; LA64-NEXT: st.d $a0, $sp, 8
+; LA64-NEXT: bl %plt(foo)
+; LA64-NEXT: ld.d $a0, $sp, 8
+; LA64-NEXT: movgr2cf $fcc0, $a0
+; LA64-NEXT: bcnez $fcc0, .LBB0_2
+; LA64-NEXT: # %bb.1: # %if.then
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: b .LBB0_3
+; LA64-NEXT: .LBB0_2: # %if.else
+; LA64-NEXT: fcmp.cle.s $fcc0, $fs0, $fs1
+; LA64-NEXT: movcf2gr $a0, $fcc0
+; LA64-NEXT: .LBB0_3: # %if.then
+; LA64-NEXT: fld.d $fs1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+ %cmp = fcmp ole float %a, 0.000000e+00
+ call void @foo()
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ ret i1 %c
+
+if.else:
+ ret i1 %cmp
+}
More information about the llvm-commits
mailing list