[llvm] [RISCV] Add 32 bit GPR sub-register for Zfinx. (PR #108336)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 13:25:48 PDT 2024
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/108336
>From ec90ba0d35ae6efb73145767eee2622abec6decc Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 9 Sep 2024 13:43:48 -0700
Subject: [PATCH 1/6] [RISCV] Add 32 bit GPR sub-register for Zfinx.
This patches adds a 32 bit register class for use with Zfinx instructions.
This makes them more similar to F instructions and allows us to
only spill 32 bits.
I've added CodeGenOnly instructions for load/store using GPRF32 as that
gave better results than insert_subreg/extract_subreg. I'm using FSGNJ for
GPRF32 copy with Zfinx as that gave better results from MachineCopyPropagation.
Function arguments use this new GPRF32 register class for f32 arguments
with Zfinx. Eliminating the need to use RISCVISD::FMV* nodes.
This is similar to #107446 which adds a 16 bit register class.
---
.../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 10 +
.../RISCV/Disassembler/RISCVDisassembler.cpp | 13 +
llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 72 ++-
.../RISCV/RISCVDeadRegisterDefinitions.cpp | 2 +
.../Target/RISCV/RISCVExpandPseudoInsts.cpp | 21 +
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 3 +
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 20 +
llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 48 ++
llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 34 +-
.../Target/RISCV/RISCVMakeCompressible.cpp | 14 +
.../lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 2 +
llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 116 +++--
.../CodeGen/RISCV/fastcc-without-f-reg.ll | 472 ++++++++++--------
llvm/test/CodeGen/RISCV/float-arith.ll | 35 +-
.../RISCV/float-bitmanip-dagcombines.ll | 6 +-
llvm/test/CodeGen/RISCV/float-frem.ll | 7 +-
llvm/test/CodeGen/RISCV/float-imm.ll | 2 +
llvm/test/CodeGen/RISCV/float-intrinsics.ll | 66 +--
llvm/test/CodeGen/RISCV/half-intrinsics.ll | 4 +-
llvm/test/CodeGen/RISCV/kcfi-mir.ll | 4 +-
llvm/test/CodeGen/RISCV/llvm.frexp.ll | 8 +
.../CodeGen/RISCV/make-compressible-zfinx.mir | 296 +++++++++++
22 files changed, 899 insertions(+), 356 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index fbad7d5d02db6c..78d6d7587160a3 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -485,8 +485,14 @@ struct RISCVOperand final : public MCParsedAsmOperand {
RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum);
}
+ bool isGPRF32() const {
+ return Kind == KindTy::Register &&
+ RISCVMCRegisterClasses[RISCV::GPRF32RegClassID].contains(Reg.RegNum);
+ }
+
bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; }
+ bool isGPRAsFPR32() const { return isGPRF32() && Reg.IsGPRAsFPR; }
bool isGPRPairAsFPR() const { return isGPRPair() && Reg.IsGPRAsFPR; }
bool isGPRPair() const {
@@ -1352,6 +1358,10 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_H;
return Match_Success;
}
+ if (Kind == MCK_GPRAsFPR32 && Op.isGPRAsFPR()) {
+ Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_W;
+ return Match_Success;
+ }
// There are some GPRF64AsFPR instructions that have no RV32 equivalent. We
// reject them at parsing thinking we should match as GPRPairAsFPR for RV32.
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index c2659a51b02096..7c8206cb44dec2 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -94,6 +94,19 @@ static DecodeStatus DecodeGPRF16RegisterClass(MCInst &Inst, uint32_t RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeGPRF32RegisterClass(MCInst &Inst, uint32_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE);
+
+ if (RegNo >= 32 || (IsRVE && RegNo >= 16))
+ return MCDisassembler::Fail;
+
+ MCRegister Reg = RISCV::X0_W + RegNo;
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index d610f0b956027a..6a96d16c5f02d5 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -156,6 +156,23 @@ static ArrayRef<MCPhysReg> getArgGPR16s(const RISCVABI::ABI ABI) {
return ArrayRef(ArgIGPRs);
}
+static ArrayRef<MCPhysReg> getArgGPR32s(const RISCVABI::ABI ABI) {
+ // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
+ // the ILP32E ABI.
+ static const MCPhysReg ArgIGPRs[] = {RISCV::X10_W, RISCV::X11_W, RISCV::X12_W,
+ RISCV::X13_W, RISCV::X14_W, RISCV::X15_W,
+ RISCV::X16_W, RISCV::X17_W};
+ // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
+ static const MCPhysReg ArgEGPRs[] = {RISCV::X10_W, RISCV::X11_W,
+ RISCV::X12_W, RISCV::X13_W,
+ RISCV::X14_W, RISCV::X15_W};
+
+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
+ return ArrayRef(ArgEGPRs);
+
+ return ArrayRef(ArgIGPRs);
+}
+
static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
// for save-restore libcall, so we don't use them.
@@ -194,6 +211,26 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRF16s(const RISCVABI::ABI ABI) {
return ArrayRef(FastCCIGPRs);
}
+static ArrayRef<MCPhysReg> getFastCCArgGPRF32s(const RISCVABI::ABI ABI) {
+ // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
+ // for save-restore libcall, so we don't use them.
+ // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
+ static const MCPhysReg FastCCIGPRs[] = {
+ RISCV::X10_W, RISCV::X11_W, RISCV::X12_W, RISCV::X13_W,
+ RISCV::X14_W, RISCV::X15_W, RISCV::X16_W, RISCV::X17_W,
+ RISCV::X28_W, RISCV::X29_W, RISCV::X30_W, RISCV::X31_W};
+
+ // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
+ static const MCPhysReg FastCCEGPRs[] = {RISCV::X10_W, RISCV::X11_W,
+ RISCV::X12_W, RISCV::X13_W,
+ RISCV::X14_W, RISCV::X15_W};
+
+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
+ return ArrayRef(FastCCEGPRs);
+
+ return ArrayRef(FastCCIGPRs);
+}
+
// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
@@ -364,11 +401,17 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
+ if ((ValVT == MVT::f32 && Subtarget.hasStdExtZfinx())) {
+ if (MCRegister Reg = State.AllocateReg(getArgGPR32s(ABI))) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
- // Zfinx/Zdinx use GPR without a bitcast when possible.
- if ((LocVT == MVT::f32 && XLen == 32 && Subtarget.hasStdExtZfinx()) ||
- (LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx())) {
+ // Zdinx use GPR without a bitcast when possible.
+ if (LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx()) {
if (MCRegister Reg = State.AllocateReg(ArgGPRs)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
@@ -616,10 +659,16 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
+ // Check if there is an available GPRF32 before hitting the stack.
+ if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
+ if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF32s(ABI))) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
// Check if there is an available GPR before hitting the stack.
- if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
- (LocVT == MVT::f64 && Subtarget.is64Bit() &&
- Subtarget.hasStdExtZdinx())) {
+ if (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx()) {
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
if (LocVT.getSizeInBits() != Subtarget.getXLen()) {
LocVT = XLenVT;
@@ -723,6 +772,17 @@ bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
+ if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
+ static const MCPhysReg GPR32List[] = {
+ RISCV::X9_W, RISCV::X18_W, RISCV::X19_W, RISCV::X20_W,
+ RISCV::X21_W, RISCV::X22_W, RISCV::X23_W, RISCV::X24_W,
+ RISCV::X25_W, RISCV::X26_W, RISCV::X27_W};
+ if (MCRegister Reg = State.AllocateReg(GPR32List)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
(LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
Subtarget.is64Bit())) {
diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
index 713c7a0661defe..d913c0b201a20c 100644
--- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
+++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
@@ -97,6 +97,8 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
if (RC && RC->contains(RISCV::X0)) {
X0Reg = RISCV::X0;
+ } else if (RC && RC->contains(RISCV::X0_W)) {
+ X0Reg = RISCV::X0_W;
} else if (RC && RC->contains(RISCV::X0_H)) {
X0Reg = RISCV::X0_H;
} else {
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 2501256ca6adf0..5dcec078856ead 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -50,6 +50,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI, unsigned Opcode);
bool expandMV_FPR16INX(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
+ bool expandMV_FPR32INX(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
bool expandRV32ZdinxStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandRV32ZdinxLoad(MachineBasicBlock &MBB,
@@ -108,6 +110,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
switch (MBBI->getOpcode()) {
case RISCV::PseudoMV_FPR16INX:
return expandMV_FPR16INX(MBB, MBBI);
+ case RISCV::PseudoMV_FPR32INX:
+ return expandMV_FPR32INX(MBB, MBBI);
case RISCV::PseudoRV32ZdinxSD:
return expandRV32ZdinxStore(MBB, MBBI);
case RISCV::PseudoRV32ZdinxLD:
@@ -287,6 +291,23 @@ bool RISCVExpandPseudo::expandMV_FPR16INX(MachineBasicBlock &MBB,
return true;
}
+bool RISCVExpandPseudo::expandMV_FPR32INX(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ const TargetRegisterInfo *TRI = STI->getRegisterInfo();
+ Register DstReg = TRI->getMatchingSuperReg(
+ MBBI->getOperand(0).getReg(), RISCV::sub_32, &RISCV::GPRRegClass);
+ Register SrcReg = TRI->getMatchingSuperReg(
+ MBBI->getOperand(1).getReg(), RISCV::sub_32, &RISCV::GPRRegClass);
+
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DstReg)
+ .addReg(SrcReg, getKillRegState(MBBI->getOperand(1).isKill()))
+ .addImm(0);
+
+ MBBI->eraseFromParent(); // The pseudo instruction is gone now.
+ return true;
+}
+
// This function expands the PseudoRV32ZdinxSD for storing a double-precision
// floating-point value into memory by generating an equivalent instruction
// sequence for RV32.
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 23479c2edf1d91..3e3f3c2eca1468 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -931,6 +931,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
Res =
CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
+ } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
+ Res =
+ CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
} else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
Res = CurDAG->getMachineNode(
Opc, DL, VT, Imm,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index f0295d289ed86a..204e14f40cf6c0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -110,6 +110,7 @@ Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
MemBytes = 2;
break;
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::FLW:
case RISCV::LWU:
MemBytes = 4;
@@ -150,6 +151,7 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
MemBytes = 2;
break;
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::FSW:
MemBytes = 4;
break;
@@ -471,6 +473,13 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ if (RISCV::GPRF32RegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR32INX), DstReg)
+ .addReg(SrcReg,
+ getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc));
+ return;
+ }
+
if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
// Emit an ADDI for both parts of GPRPair.
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
@@ -595,6 +604,9 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::SH_INX;
IsScalableVector = false;
+ } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
+ Opcode = RISCV::SW_INX;
+ IsScalableVector = false;
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxSD;
IsScalableVector = false;
@@ -681,6 +693,9 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::LH_INX;
IsScalableVector = false;
+ } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
+ Opcode = RISCV::LW_INX;
+ IsScalableVector = false;
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxLD;
IsScalableVector = false;
@@ -1554,6 +1569,7 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
switch (Opcode) {
case RISCV::PseudoMV_FPR16INX:
+ case RISCV::PseudoMV_FPR32INX:
// MV is always compressible to either c.mv or c.li rd, 0.
return STI.hasStdExtCOrZca() ? 2 : 4;
case TargetOpcode::STACKMAP:
@@ -2614,6 +2630,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
case RISCV::LH_INX:
case RISCV::LHU:
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::LWU:
case RISCV::LD:
case RISCV::FLH:
@@ -2623,6 +2640,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
case RISCV::SH:
case RISCV::SH_INX:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::SD:
case RISCV::FSH:
case RISCV::FSW:
@@ -2692,9 +2710,11 @@ bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
case RISCV::SH_INX:
case RISCV::FSH:
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::LWU:
case RISCV::FLW:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::FSW:
case RISCV::LD:
case RISCV::FLD:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 3f279b7a58ca68..7d742322b42969 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -331,6 +331,15 @@ def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>,
let Inst{5} = imm{6};
}
+let isCodeGenOnly = 1 in
+def C_LW_INX : CLoad_ri<0b010, "c.lw", GPRF32C, uimm7_lsb00>,
+ Sched<[WriteLDW, ReadMemBase]> {
+ bits<7> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6} = imm{2};
+ let Inst{5} = imm{6};
+}
+
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
@@ -365,6 +374,15 @@ def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>,
let Inst{5} = imm{6};
}
+let isCodeGenOnly = 1 in
+def C_SW_INX : CStore_rri<0b110, "c.sw", GPRF32C, uimm7_lsb00>,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
+ bits<7> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6} = imm{2};
+ let Inst{5} = imm{6};
+}
+
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>,
@@ -517,6 +535,13 @@ def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>,
let Inst{3-2} = imm{7-6};
}
+let isCodeGenOnly = 1 in
+def C_LWSP_INX : CStackLoad<0b010, "c.lwsp", GPRF32NoX0, uimm8_lsb00>,
+ Sched<[WriteLDW, ReadMemBase]> {
+ let Inst{6-4} = imm{4-2};
+ let Inst{3-2} = imm{7-6};
+}
+
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>,
@@ -575,6 +600,13 @@ def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>,
let Inst{8-7} = imm{7-6};
}
+let isCodeGenOnly = 1 in
+def C_SWSP_INX : CStackStore<0b110, "c.swsp", GPRF32, uimm8_lsb00>,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
+ let Inst{12-9} = imm{5-2};
+ let Inst{8-7} = imm{7-6};
+}
+
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>,
@@ -869,6 +901,10 @@ def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm),
let Predicates = [HasStdExtCOrZca] in {
def : CompressPat<(LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
+
+let isCompressOnly = true in
+def : CompressPat<(LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
+ (C_LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
@@ -889,6 +925,10 @@ def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm),
let Predicates = [HasStdExtCOrZca] in {
def : CompressPat<(SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
+
+let isCompressOnly = true in
+def : CompressPat<(SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
+ (C_SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
@@ -992,6 +1032,10 @@ def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm),
let Predicates = [HasStdExtCOrZca] in {
def : CompressPat<(LW GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
(C_LWSP GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
+
+let isCompressOnly = true in
+def : CompressPat<(LW_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
+ (C_LWSP_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
@@ -1034,6 +1078,10 @@ def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm),
let Predicates = [HasStdExtCOrZca] in {
def : CompressPat<(SW GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
(C_SWSP GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
+
+let isCompressOnly = true in
+def : CompressPat<(SW_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
+ (C_SWSP_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index a00acb372dc2a2..1146637f106e7a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -83,15 +83,14 @@ def any_fma_nsz : PatFrag<(ops node:$rs1, node:$rs2, node:$rs3),
// Zfinx
-def GPRAsFPR : AsmOperandClass {
- let Name = "GPRAsFPR";
+def GPRAsFPR32 : AsmOperandClass {
+ let Name = "GPRAsFPR32";
let ParserMethod = "parseGPRAsFPR";
let RenderMethod = "addRegOperands";
}
def FPR32INX : RegisterOperand<GPRF32> {
- let ParserMatchClass = GPRAsFPR;
- let DecoderMethod = "DecodeGPRRegisterClass";
+ let ParserMatchClass = GPRAsFPR32;
}
// Describes a combination of predicates from F/D/Zfh/Zfhmin or
@@ -306,6 +305,19 @@ def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>;
def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
} // Predicates = [HasStdExtF]
+let Predicates = [HasStdExtZfinx], isCodeGenOnly = 1 in {
+def LW_INX : Load_ri<0b010, "lw", GPRF32>, Sched<[WriteLDW, ReadMemBase]>;
+def SW_INX : Store_rri<0b010, "sw", GPRF32>,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+
+// ADDI with GPRF16 register class to use for copy. This should not be used as
+// general ADDI, so the immediate should always be zero.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveReg = 1,
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def PseudoMV_FPR32INX : Pseudo<(outs GPRF32:$rd), (ins GPRF32:$rs), []>,
+ Sched<[WriteIALU, ReadIALU]>;
+}
+
foreach Ext = FExts in {
let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in {
defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", Ext>;
@@ -685,12 +697,10 @@ defm Select_FPR32INX : SelectCC_GPR_rrirr<FPR32INX, f32>;
def PseudoFROUND_S_INX : PseudoFROUND<FPR32INX, f32>;
/// Loads
-def : Pat<(f32 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
- (COPY_TO_REGCLASS (LW GPR:$rs1, simm12:$imm12), GPRF32)>;
+def : LdPat<load, LW_INX, f32>;
/// Stores
-def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
- (SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
+def : StPat<store, SW_INX, GPRF32, f32>;
} // Predicates = [HasStdExtZfinx]
let Predicates = [HasStdExtF] in {
@@ -701,8 +711,8 @@ def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
let Predicates = [HasStdExtZfinx] in {
// Moves (no conversion)
-def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>;
-def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>;
+def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (EXTRACT_SUBREG GPR:$rs1, sub_32)>;
+def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR32INX:$rs1, sub_32)>;
} // Predicates = [HasStdExtZfinx]
let Predicates = [HasStdExtF] in {
@@ -781,8 +791,8 @@ def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, FRM_DYN)>;
let Predicates = [HasStdExtZfinx, IsRV64] in {
// Moves (no conversion)
-def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (COPY_TO_REGCLASS GPR:$src, GPRF32)>;
-def : Pat<(riscv_fmv_x_anyextw_rv64 GPRF32:$src), (COPY_TO_REGCLASS GPRF32:$src, GPR)>;
+def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (EXTRACT_SUBREG GPR:$src, sub_32)>;
+def : Pat<(riscv_fmv_x_anyextw_rv64 GPRF32:$src), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR32INX:$src, sub_32)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index 5973e5bf2e5252..df5501e37f8313 100644
--- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -109,7 +109,9 @@ static unsigned log2LdstWidth(unsigned Opcode) {
case RISCV::SH_INX:
return 1;
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::FLW:
case RISCV::FSW:
return 2;
@@ -136,7 +138,9 @@ static unsigned offsetMask(unsigned Opcode) {
case RISCV::SH_INX:
return maskTrailingOnes<unsigned>(1U);
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::FLW:
case RISCV::FSW:
case RISCV::LD:
@@ -178,6 +182,7 @@ static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
static bool isCompressedReg(Register Reg) {
return RISCV::GPRCRegClass.contains(Reg) ||
RISCV::GPRF16CRegClass.contains(Reg) ||
+ RISCV::GPRF32CRegClass.contains(Reg) ||
RISCV::FPR32CRegClass.contains(Reg) ||
RISCV::FPR64CRegClass.contains(Reg);
}
@@ -195,6 +200,7 @@ static bool isCompressibleLoad(const MachineInstr &MI) {
case RISCV::LHU:
return STI.hasStdExtZcb();
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::LD:
return STI.hasStdExtCOrZca();
case RISCV::FLW:
@@ -216,6 +222,7 @@ static bool isCompressibleStore(const MachineInstr &MI) {
case RISCV::SH_INX:
return STI.hasStdExtZcb();
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::SD:
return STI.hasStdExtCOrZca();
case RISCV::FSW:
@@ -329,6 +336,8 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI,
RCToScavenge = &RISCV::GPRCRegClass;
else if (RISCV::GPRF16RegClass.contains(RegImm.Reg))
RCToScavenge = &RISCV::GPRF16CRegClass;
+ else if (RISCV::GPRF32RegClass.contains(RegImm.Reg))
+ RCToScavenge = &RISCV::GPRF32CRegClass;
else if (RISCV::FPR32RegClass.contains(RegImm.Reg))
RCToScavenge = &RISCV::FPR32CRegClass;
else if (RISCV::FPR64RegClass.contains(RegImm.Reg))
@@ -424,6 +433,11 @@ bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR16INX),
NewReg)
.addReg(RegImm.Reg);
+ } else if (RISCV::GPRF32RegClass.contains(RegImm.Reg)) {
+ assert(RegImm.Imm == 0);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR32INX),
+ NewReg)
+ .addReg(RegImm.Reg);
} else {
// If we are looking at replacing an FPR register we don't expect to
// have any offset. The only compressible FP instructions with an offset
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index b3a2877edde4e3..a324deb4e48f5c 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -387,6 +387,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
case RISCV::LH:
case RISCV::LH_INX:
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LWU:
@@ -398,6 +399,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
case RISCV::SH:
case RISCV::SH_INX:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::SD:
case RISCV::FSH:
case RISCV::FSW:
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 9cb589f2441a21..1c5cdea1978efd 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -124,41 +124,81 @@ let RegAltNameIndices = [ABIRegAltName] in {
let SubRegIndices = [sub_16] in {
let isConstant = true in
- def X0 : RISCVRegWithSubRegs<0, "x0", [X0_H], ["zero"]>, DwarfRegNum<[0]>;
+ def X0_W : RISCVRegWithSubRegs<0, "x0", [X0_H], ["zero"]>;
let CostPerUse = [0, 1] in {
- def X1 : RISCVRegWithSubRegs<1, "x1", [X1_H], ["ra"]>, DwarfRegNum<[1]>;
- def X2 : RISCVRegWithSubRegs<2, "x2", [X2_H], ["sp"]>, DwarfRegNum<[2]>;
- def X3 : RISCVRegWithSubRegs<3, "x3", [X3_H], ["gp"]>, DwarfRegNum<[3]>;
- def X4 : RISCVRegWithSubRegs<4, "x4", [X4_H], ["tp"]>, DwarfRegNum<[4]>;
- def X5 : RISCVRegWithSubRegs<5, "x5", [X5_H], ["t0"]>, DwarfRegNum<[5]>;
- def X6 : RISCVRegWithSubRegs<6, "x6", [X6_H], ["t1"]>, DwarfRegNum<[6]>;
- def X7 : RISCVRegWithSubRegs<7, "x7", [X7_H], ["t2"]>, DwarfRegNum<[7]>;
+ def X1_W : RISCVRegWithSubRegs<1, "x1", [X1_H], ["ra"]>;
+ def X2_W : RISCVRegWithSubRegs<2, "x2", [X2_H], ["sp"]>;
+ def X3_W : RISCVRegWithSubRegs<3, "x3", [X3_H], ["gp"]>;
+ def X4_W : RISCVRegWithSubRegs<4, "x4", [X4_H], ["tp"]>;
+ def X5_W : RISCVRegWithSubRegs<5, "x5", [X5_H], ["t0"]>;
+ def X6_W : RISCVRegWithSubRegs<6, "x6", [X6_H], ["t1"]>;
+ def X7_W : RISCVRegWithSubRegs<7, "x7", [X7_H], ["t2"]>;
}
- def X8 : RISCVRegWithSubRegs<8, "x8", [X8_H], ["s0", "fp"]>, DwarfRegNum<[8]>;
- def X9 : RISCVRegWithSubRegs<9, "x9", [X9_H], ["s1"]>, DwarfRegNum<[9]>;
- def X10 : RISCVRegWithSubRegs<10,"x10", [X10_H], ["a0"]>, DwarfRegNum<[10]>;
- def X11 : RISCVRegWithSubRegs<11,"x11", [X11_H], ["a1"]>, DwarfRegNum<[11]>;
- def X12 : RISCVRegWithSubRegs<12,"x12", [X12_H], ["a2"]>, DwarfRegNum<[12]>;
- def X13 : RISCVRegWithSubRegs<13,"x13", [X13_H], ["a3"]>, DwarfRegNum<[13]>;
- def X14 : RISCVRegWithSubRegs<14,"x14", [X14_H], ["a4"]>, DwarfRegNum<[14]>;
- def X15 : RISCVRegWithSubRegs<15,"x15", [X15_H], ["a5"]>, DwarfRegNum<[15]>;
+ def X8_W : RISCVRegWithSubRegs<8, "x8", [X8_H], ["s0", "fp"]>;
+ def X9_W : RISCVRegWithSubRegs<9, "x9", [X9_H], ["s1"]>;
+ def X10_W : RISCVRegWithSubRegs<10,"x10", [X10_H], ["a0"]>;
+ def X11_W : RISCVRegWithSubRegs<11,"x11", [X11_H], ["a1"]>;
+ def X12_W : RISCVRegWithSubRegs<12,"x12", [X12_H], ["a2"]>;
+ def X13_W : RISCVRegWithSubRegs<13,"x13", [X13_H], ["a3"]>;
+ def X14_W : RISCVRegWithSubRegs<14,"x14", [X14_H], ["a4"]>;
+ def X15_W : RISCVRegWithSubRegs<15,"x15", [X15_H], ["a5"]>;
let CostPerUse = [0, 1] in {
- def X16 : RISCVRegWithSubRegs<16,"x16", [X16_H], ["a6"]>, DwarfRegNum<[16]>;
- def X17 : RISCVRegWithSubRegs<17,"x17", [X17_H], ["a7"]>, DwarfRegNum<[17]>;
- def X18 : RISCVRegWithSubRegs<18,"x18", [X18_H], ["s2"]>, DwarfRegNum<[18]>;
- def X19 : RISCVRegWithSubRegs<19,"x19", [X19_H], ["s3"]>, DwarfRegNum<[19]>;
- def X20 : RISCVRegWithSubRegs<20,"x20", [X20_H], ["s4"]>, DwarfRegNum<[20]>;
- def X21 : RISCVRegWithSubRegs<21,"x21", [X21_H], ["s5"]>, DwarfRegNum<[21]>;
- def X22 : RISCVRegWithSubRegs<22,"x22", [X22_H], ["s6"]>, DwarfRegNum<[22]>;
- def X23 : RISCVRegWithSubRegs<23,"x23", [X23_H], ["s7"]>, DwarfRegNum<[23]>;
- def X24 : RISCVRegWithSubRegs<24,"x24", [X24_H], ["s8"]>, DwarfRegNum<[24]>;
- def X25 : RISCVRegWithSubRegs<25,"x25", [X25_H], ["s9"]>, DwarfRegNum<[25]>;
- def X26 : RISCVRegWithSubRegs<26,"x26", [X26_H], ["s10"]>, DwarfRegNum<[26]>;
- def X27 : RISCVRegWithSubRegs<27,"x27", [X27_H], ["s11"]>, DwarfRegNum<[27]>;
- def X28 : RISCVRegWithSubRegs<28,"x28", [X28_H], ["t3"]>, DwarfRegNum<[28]>;
- def X29 : RISCVRegWithSubRegs<29,"x29", [X29_H], ["t4"]>, DwarfRegNum<[29]>;
- def X30 : RISCVRegWithSubRegs<30,"x30", [X30_H], ["t5"]>, DwarfRegNum<[30]>;
- def X31 : RISCVRegWithSubRegs<31,"x31", [X31_H], ["t6"]>, DwarfRegNum<[31]>;
+ def X16_W : RISCVRegWithSubRegs<16,"x16", [X16_H], ["a6"]>;
+ def X17_W : RISCVRegWithSubRegs<17,"x17", [X17_H], ["a7"]>;
+ def X18_W : RISCVRegWithSubRegs<18,"x18", [X18_H], ["s2"]>;
+ def X19_W : RISCVRegWithSubRegs<19,"x19", [X19_H], ["s3"]>;
+ def X20_W : RISCVRegWithSubRegs<20,"x20", [X20_H], ["s4"]>;
+ def X21_W : RISCVRegWithSubRegs<21,"x21", [X21_H], ["s5"]>;
+ def X22_W : RISCVRegWithSubRegs<22,"x22", [X22_H], ["s6"]>;
+ def X23_W : RISCVRegWithSubRegs<23,"x23", [X23_H], ["s7"]>;
+ def X24_W : RISCVRegWithSubRegs<24,"x24", [X24_H], ["s8"]>;
+ def X25_W : RISCVRegWithSubRegs<25,"x25", [X25_H], ["s9"]>;
+ def X26_W : RISCVRegWithSubRegs<26,"x26", [X26_H], ["s10"]>;
+ def X27_W : RISCVRegWithSubRegs<27,"x27", [X27_H], ["s11"]>;
+ def X28_W : RISCVRegWithSubRegs<28,"x28", [X28_H], ["t3"]>;
+ def X29_W : RISCVRegWithSubRegs<29,"x29", [X29_H], ["t4"]>;
+ def X30_W : RISCVRegWithSubRegs<30,"x30", [X30_H], ["t5"]>;
+ def X31_W : RISCVRegWithSubRegs<31,"x31", [X31_H], ["t6"]>;
+ }
+ }
+
+ let SubRegIndices = [sub_32] in {
+ let isConstant = true in
+ def X0 : RISCVRegWithSubRegs<0, "x0", [X0_W], ["zero"]>, DwarfRegNum<[0]>;
+ let CostPerUse = [0, 1] in {
+ def X1 : RISCVRegWithSubRegs<1, "x1", [X1_W], ["ra"]>, DwarfRegNum<[1]>;
+ def X2 : RISCVRegWithSubRegs<2, "x2", [X2_W], ["sp"]>, DwarfRegNum<[2]>;
+ def X3 : RISCVRegWithSubRegs<3, "x3", [X3_W], ["gp"]>, DwarfRegNum<[3]>;
+ def X4 : RISCVRegWithSubRegs<4, "x4", [X4_W], ["tp"]>, DwarfRegNum<[4]>;
+ def X5 : RISCVRegWithSubRegs<5, "x5", [X5_W], ["t0"]>, DwarfRegNum<[5]>;
+ def X6 : RISCVRegWithSubRegs<6, "x6", [X6_W], ["t1"]>, DwarfRegNum<[6]>;
+ def X7 : RISCVRegWithSubRegs<7, "x7", [X7_W], ["t2"]>, DwarfRegNum<[7]>;
+ }
+ def X8 : RISCVRegWithSubRegs<8, "x8", [X8_W], ["s0", "fp"]>, DwarfRegNum<[8]>;
+ def X9 : RISCVRegWithSubRegs<9, "x9", [X9_W], ["s1"]>, DwarfRegNum<[9]>;
+ def X10 : RISCVRegWithSubRegs<10,"x10", [X10_W], ["a0"]>, DwarfRegNum<[10]>;
+ def X11 : RISCVRegWithSubRegs<11,"x11", [X11_W], ["a1"]>, DwarfRegNum<[11]>;
+ def X12 : RISCVRegWithSubRegs<12,"x12", [X12_W], ["a2"]>, DwarfRegNum<[12]>;
+ def X13 : RISCVRegWithSubRegs<13,"x13", [X13_W], ["a3"]>, DwarfRegNum<[13]>;
+ def X14 : RISCVRegWithSubRegs<14,"x14", [X14_W], ["a4"]>, DwarfRegNum<[14]>;
+ def X15 : RISCVRegWithSubRegs<15,"x15", [X15_W], ["a5"]>, DwarfRegNum<[15]>;
+ let CostPerUse = [0, 1] in {
+ def X16 : RISCVRegWithSubRegs<16,"x16", [X16_W], ["a6"]>, DwarfRegNum<[16]>;
+ def X17 : RISCVRegWithSubRegs<17,"x17", [X17_W], ["a7"]>, DwarfRegNum<[17]>;
+ def X18 : RISCVRegWithSubRegs<18,"x18", [X18_W], ["s2"]>, DwarfRegNum<[18]>;
+ def X19 : RISCVRegWithSubRegs<19,"x19", [X19_W], ["s3"]>, DwarfRegNum<[19]>;
+ def X20 : RISCVRegWithSubRegs<20,"x20", [X20_W], ["s4"]>, DwarfRegNum<[20]>;
+ def X21 : RISCVRegWithSubRegs<21,"x21", [X21_W], ["s5"]>, DwarfRegNum<[21]>;
+ def X22 : RISCVRegWithSubRegs<22,"x22", [X22_W], ["s6"]>, DwarfRegNum<[22]>;
+ def X23 : RISCVRegWithSubRegs<23,"x23", [X23_W], ["s7"]>, DwarfRegNum<[23]>;
+ def X24 : RISCVRegWithSubRegs<24,"x24", [X24_W], ["s8"]>, DwarfRegNum<[24]>;
+ def X25 : RISCVRegWithSubRegs<25,"x25", [X25_W], ["s9"]>, DwarfRegNum<[25]>;
+ def X26 : RISCVRegWithSubRegs<26,"x26", [X26_W], ["s10"]>, DwarfRegNum<[26]>;
+ def X27 : RISCVRegWithSubRegs<27,"x27", [X27_W], ["s11"]>, DwarfRegNum<[27]>;
+ def X28 : RISCVRegWithSubRegs<28,"x28", [X28_W], ["t3"]>, DwarfRegNum<[28]>;
+ def X29 : RISCVRegWithSubRegs<29,"x29", [X29_W], ["t4"]>, DwarfRegNum<[29]>;
+ def X30 : RISCVRegWithSubRegs<30,"x30", [X30_W], ["t5"]>, DwarfRegNum<[30]>;
+ def X31 : RISCVRegWithSubRegs<31,"x31", [X31_W], ["t6"]>, DwarfRegNum<[31]>;
}
}
}
@@ -617,9 +657,15 @@ def GPRF16 : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 17),
def GPRF16C : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 15),
(sequence "X%u_H", 8, 9))>;
-let RegInfos = XLenRI in {
-def GPRF32 : RISCVRegisterClass<[f32], 32, (add GPR)>;
-} // RegInfos = XLenRI
+def GPRF32 : RISCVRegisterClass<[f32], 32, (add (sequence "X%u_W", 10, 17),
+ (sequence "X%u_W", 5, 7),
+ (sequence "X%u_W", 28, 31),
+ (sequence "X%u_W", 8, 9),
+ (sequence "X%u_W", 18, 27),
+ (sequence "X%u_W", 0, 4))>;
+def GPRF32C : RISCVRegisterClass<[f32], 32, (add (sequence "X%u_W", 10, 15),
+ (sequence "X%u_W", 8, 9))>;
+def GPRF32NoX0 : RISCVRegisterClass<[f32], 32, (sub GPRF32, X0_W)>;
// Dummy zero register for use in the register pair containing X0 (as X1 is
// not read to or written when the X0 register pair is used).
diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index de5bb8a30db16b..0eefc34ad552a9 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -23,26 +23,34 @@ define half @caller_half(half %x) nounwind {
;
; ZFINX32-LABEL: caller_half:
; ZFINX32: # %bb.0: # %entry
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX32-NEXT: tail h
;
; ZFINX64-LABEL: caller_half:
; ZFINX64: # %bb.0: # %entry
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX64-NEXT: tail h
;
; ZDINX32-LABEL: caller_half:
; ZDINX32: # %bb.0: # %entry
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX32-NEXT: lui a1, 1048560
; ZDINX32-NEXT: or a0, a0, a1
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX32-NEXT: tail h
;
; ZDINX64-LABEL: caller_half:
; ZDINX64: # %bb.0: # %entry
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX64-NEXT: lui a1, 1048560
; ZDINX64-NEXT: or a0, a0, a1
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX64-NEXT: tail h
entry:
%0 = tail call fastcc half @h(half %x)
@@ -60,26 +68,34 @@ define internal fastcc half @h(half %x) nounwind {
;
; ZFINX32-LABEL: h:
; ZFINX32: # %bb.0:
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX32-NEXT: ret
;
; ZFINX64-LABEL: h:
; ZFINX64: # %bb.0:
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX64-NEXT: ret
;
; ZDINX32-LABEL: h:
; ZDINX32: # %bb.0:
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX32-NEXT: lui a1, 1048560
; ZDINX32-NEXT: or a0, a0, a1
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX32-NEXT: ret
;
; ZDINX64-LABEL: h:
; ZDINX64: # %bb.0:
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX64-NEXT: lui a1, 1048560
; ZDINX64-NEXT: or a0, a0, a1
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX64-NEXT: ret
ret half %x
}
@@ -220,24 +236,28 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind {
; ZFINX32: # %bb.0:
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX32-NEXT: ret
;
; ZFINX64-LABEL: callee_half_32:
; ZFINX64: # %bb.0:
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX64-NEXT: ret
;
; ZDINX32-LABEL: callee_half_32:
; ZDINX32: # %bb.0:
; ZDINX32-NEXT: lui a1, 1048560
; ZDINX32-NEXT: or a0, a0, a1
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX32-NEXT: ret
;
; ZDINX64-LABEL: callee_half_32:
; ZDINX64: # %bb.0:
; ZDINX64-NEXT: lui a1, 1048560
; ZDINX64-NEXT: or a0, a0, a1
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX64-NEXT: ret
%B = extractelement <32 x half> %A, i32 0
ret half %B
@@ -492,8 +512,10 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: call callee_half_32
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
@@ -588,8 +610,10 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: call callee_half_32
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX64-NEXT: ld ra, 296(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: ld s0, 288(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: ld s1, 280(sp) # 8-byte Folded Reload
@@ -684,8 +708,10 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZDINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: call callee_half_32
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX32-NEXT: lui a1, 1048560
; ZDINX32-NEXT: or a0, a0, a1
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
@@ -780,8 +806,10 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZDINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: call callee_half_32
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX64-NEXT: lui a1, 1048560
; ZDINX64-NEXT: or a0, a0, a1
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX64-NEXT: ld ra, 296(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: ld s0, 288(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: ld s1, 280(sp) # 8-byte Folded Reload
@@ -917,79 +945,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
;
; ZHINX64-LABEL: caller_float_32:
; ZHINX64: # %bb.0:
-; ZHINX64-NEXT: addi sp, sp, -192
-; ZHINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: addi sp, sp, -208
+; ZHINX64-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s0, 192(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s1, 184(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s2, 176(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s3, 168(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s4, 160(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s5, 152(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s6, 144(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s7, 136(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s8, 128(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: lw t0, 208(sp)
+; ZHINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: lw t0, 216(sp)
+; ZHINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
; ZHINX64-NEXT: lw t0, 224(sp)
-; ZHINX64-NEXT: lw t1, 232(sp)
-; ZHINX64-NEXT: lw t2, 240(sp)
-; ZHINX64-NEXT: lw s0, 248(sp)
-; ZHINX64-NEXT: lw t3, 256(sp)
-; ZHINX64-NEXT: lw t4, 264(sp)
-; ZHINX64-NEXT: lw t5, 272(sp)
-; ZHINX64-NEXT: lw t6, 280(sp)
-; ZHINX64-NEXT: lw s1, 288(sp)
-; ZHINX64-NEXT: lw s2, 296(sp)
-; ZHINX64-NEXT: lw s3, 304(sp)
-; ZHINX64-NEXT: lw s4, 312(sp)
-; ZHINX64-NEXT: lw s5, 320(sp)
-; ZHINX64-NEXT: lw s6, 328(sp)
-; ZHINX64-NEXT: lw s7, 336(sp)
-; ZHINX64-NEXT: lw s8, 344(sp)
-; ZHINX64-NEXT: lw s9, 352(sp)
-; ZHINX64-NEXT: lw s10, 360(sp)
-; ZHINX64-NEXT: lw s11, 368(sp)
-; ZHINX64-NEXT: lw ra, 376(sp)
-; ZHINX64-NEXT: sw ra, 76(sp)
-; ZHINX64-NEXT: sw s11, 72(sp)
-; ZHINX64-NEXT: sw s10, 68(sp)
-; ZHINX64-NEXT: sw s9, 64(sp)
-; ZHINX64-NEXT: sw s8, 60(sp)
-; ZHINX64-NEXT: sw s7, 56(sp)
-; ZHINX64-NEXT: sw s6, 52(sp)
-; ZHINX64-NEXT: sw s5, 48(sp)
-; ZHINX64-NEXT: sw s4, 44(sp)
-; ZHINX64-NEXT: sw s3, 40(sp)
-; ZHINX64-NEXT: sw s2, 36(sp)
-; ZHINX64-NEXT: sw s1, 32(sp)
-; ZHINX64-NEXT: sw t6, 28(sp)
-; ZHINX64-NEXT: sw t5, 24(sp)
-; ZHINX64-NEXT: sw t4, 20(sp)
-; ZHINX64-NEXT: sw t3, 16(sp)
-; ZHINX64-NEXT: lw t3, 192(sp)
-; ZHINX64-NEXT: lw t4, 200(sp)
-; ZHINX64-NEXT: lw t5, 208(sp)
-; ZHINX64-NEXT: lw t6, 216(sp)
+; ZHINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: lw t0, 232(sp)
+; ZHINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: lw t6, 240(sp)
+; ZHINX64-NEXT: lw t5, 248(sp)
+; ZHINX64-NEXT: lw t4, 256(sp)
+; ZHINX64-NEXT: lw s0, 264(sp)
+; ZHINX64-NEXT: lw s1, 272(sp)
+; ZHINX64-NEXT: lw s2, 280(sp)
+; ZHINX64-NEXT: lw s3, 288(sp)
+; ZHINX64-NEXT: lw s4, 296(sp)
+; ZHINX64-NEXT: lw s5, 304(sp)
+; ZHINX64-NEXT: lw s6, 312(sp)
+; ZHINX64-NEXT: lw s7, 320(sp)
+; ZHINX64-NEXT: lw s8, 328(sp)
+; ZHINX64-NEXT: lw s9, 336(sp)
+; ZHINX64-NEXT: lw s10, 344(sp)
+; ZHINX64-NEXT: lw s11, 352(sp)
+; ZHINX64-NEXT: lw ra, 360(sp)
+; ZHINX64-NEXT: lw t3, 368(sp)
+; ZHINX64-NEXT: lw t2, 376(sp)
+; ZHINX64-NEXT: lw t1, 384(sp)
+; ZHINX64-NEXT: lw t0, 392(sp)
+; ZHINX64-NEXT: sw t0, 76(sp)
+; ZHINX64-NEXT: sw t1, 72(sp)
+; ZHINX64-NEXT: sw t2, 68(sp)
+; ZHINX64-NEXT: sw t3, 64(sp)
+; ZHINX64-NEXT: sw ra, 60(sp)
+; ZHINX64-NEXT: sw s11, 56(sp)
+; ZHINX64-NEXT: sw s10, 52(sp)
+; ZHINX64-NEXT: sw s9, 48(sp)
+; ZHINX64-NEXT: sw s8, 44(sp)
+; ZHINX64-NEXT: sw s7, 40(sp)
+; ZHINX64-NEXT: sw s6, 36(sp)
+; ZHINX64-NEXT: sw s5, 32(sp)
+; ZHINX64-NEXT: sw s4, 28(sp)
+; ZHINX64-NEXT: sw s3, 24(sp)
+; ZHINX64-NEXT: sw s2, 20(sp)
+; ZHINX64-NEXT: sw s1, 16(sp)
; ZHINX64-NEXT: sw s0, 12(sp)
-; ZHINX64-NEXT: sw t2, 8(sp)
-; ZHINX64-NEXT: sw t1, 4(sp)
-; ZHINX64-NEXT: sw t0, 0(sp)
+; ZHINX64-NEXT: sw t4, 8(sp)
+; ZHINX64-NEXT: sw t5, 4(sp)
+; ZHINX64-NEXT: sw t6, 0(sp)
+; ZHINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload
; ZHINX64-NEXT: call callee_float_32
-; ZHINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: addi sp, sp, 192
+; ZHINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s2, 176(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s4, 160(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s5, 152(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s6, 144(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s7, 136(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s8, 128(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s9, 120(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s10, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s11, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: addi sp, sp, 208
; ZHINX64-NEXT: ret
;
; ZFINX32-LABEL: caller_float_32:
@@ -1079,79 +1115,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
;
; ZFINX64-LABEL: caller_float_32:
; ZFINX64: # %bb.0:
-; ZFINX64-NEXT: addi sp, sp, -192
-; ZFINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: addi sp, sp, -208
+; ZFINX64-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s0, 192(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s1, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s2, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s3, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s4, 160(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s5, 152(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s6, 144(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s7, 136(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s8, 128(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: lw t0, 208(sp)
+; ZFINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: lw t0, 216(sp)
+; ZFINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
; ZFINX64-NEXT: lw t0, 224(sp)
-; ZFINX64-NEXT: lw t1, 232(sp)
-; ZFINX64-NEXT: lw t2, 240(sp)
-; ZFINX64-NEXT: lw s0, 248(sp)
-; ZFINX64-NEXT: lw t3, 256(sp)
-; ZFINX64-NEXT: lw t4, 264(sp)
-; ZFINX64-NEXT: lw t5, 272(sp)
-; ZFINX64-NEXT: lw t6, 280(sp)
-; ZFINX64-NEXT: lw s1, 288(sp)
-; ZFINX64-NEXT: lw s2, 296(sp)
-; ZFINX64-NEXT: lw s3, 304(sp)
-; ZFINX64-NEXT: lw s4, 312(sp)
-; ZFINX64-NEXT: lw s5, 320(sp)
-; ZFINX64-NEXT: lw s6, 328(sp)
-; ZFINX64-NEXT: lw s7, 336(sp)
-; ZFINX64-NEXT: lw s8, 344(sp)
-; ZFINX64-NEXT: lw s9, 352(sp)
-; ZFINX64-NEXT: lw s10, 360(sp)
-; ZFINX64-NEXT: lw s11, 368(sp)
-; ZFINX64-NEXT: lw ra, 376(sp)
-; ZFINX64-NEXT: sw ra, 76(sp)
-; ZFINX64-NEXT: sw s11, 72(sp)
-; ZFINX64-NEXT: sw s10, 68(sp)
-; ZFINX64-NEXT: sw s9, 64(sp)
-; ZFINX64-NEXT: sw s8, 60(sp)
-; ZFINX64-NEXT: sw s7, 56(sp)
-; ZFINX64-NEXT: sw s6, 52(sp)
-; ZFINX64-NEXT: sw s5, 48(sp)
-; ZFINX64-NEXT: sw s4, 44(sp)
-; ZFINX64-NEXT: sw s3, 40(sp)
-; ZFINX64-NEXT: sw s2, 36(sp)
-; ZFINX64-NEXT: sw s1, 32(sp)
-; ZFINX64-NEXT: sw t6, 28(sp)
-; ZFINX64-NEXT: sw t5, 24(sp)
-; ZFINX64-NEXT: sw t4, 20(sp)
-; ZFINX64-NEXT: sw t3, 16(sp)
-; ZFINX64-NEXT: lw t3, 192(sp)
-; ZFINX64-NEXT: lw t4, 200(sp)
-; ZFINX64-NEXT: lw t5, 208(sp)
-; ZFINX64-NEXT: lw t6, 216(sp)
+; ZFINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: lw t0, 232(sp)
+; ZFINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: lw t6, 240(sp)
+; ZFINX64-NEXT: lw t5, 248(sp)
+; ZFINX64-NEXT: lw t4, 256(sp)
+; ZFINX64-NEXT: lw s0, 264(sp)
+; ZFINX64-NEXT: lw s1, 272(sp)
+; ZFINX64-NEXT: lw s2, 280(sp)
+; ZFINX64-NEXT: lw s3, 288(sp)
+; ZFINX64-NEXT: lw s4, 296(sp)
+; ZFINX64-NEXT: lw s5, 304(sp)
+; ZFINX64-NEXT: lw s6, 312(sp)
+; ZFINX64-NEXT: lw s7, 320(sp)
+; ZFINX64-NEXT: lw s8, 328(sp)
+; ZFINX64-NEXT: lw s9, 336(sp)
+; ZFINX64-NEXT: lw s10, 344(sp)
+; ZFINX64-NEXT: lw s11, 352(sp)
+; ZFINX64-NEXT: lw ra, 360(sp)
+; ZFINX64-NEXT: lw t3, 368(sp)
+; ZFINX64-NEXT: lw t2, 376(sp)
+; ZFINX64-NEXT: lw t1, 384(sp)
+; ZFINX64-NEXT: lw t0, 392(sp)
+; ZFINX64-NEXT: sw t0, 76(sp)
+; ZFINX64-NEXT: sw t1, 72(sp)
+; ZFINX64-NEXT: sw t2, 68(sp)
+; ZFINX64-NEXT: sw t3, 64(sp)
+; ZFINX64-NEXT: sw ra, 60(sp)
+; ZFINX64-NEXT: sw s11, 56(sp)
+; ZFINX64-NEXT: sw s10, 52(sp)
+; ZFINX64-NEXT: sw s9, 48(sp)
+; ZFINX64-NEXT: sw s8, 44(sp)
+; ZFINX64-NEXT: sw s7, 40(sp)
+; ZFINX64-NEXT: sw s6, 36(sp)
+; ZFINX64-NEXT: sw s5, 32(sp)
+; ZFINX64-NEXT: sw s4, 28(sp)
+; ZFINX64-NEXT: sw s3, 24(sp)
+; ZFINX64-NEXT: sw s2, 20(sp)
+; ZFINX64-NEXT: sw s1, 16(sp)
; ZFINX64-NEXT: sw s0, 12(sp)
-; ZFINX64-NEXT: sw t2, 8(sp)
-; ZFINX64-NEXT: sw t1, 4(sp)
-; ZFINX64-NEXT: sw t0, 0(sp)
+; ZFINX64-NEXT: sw t4, 8(sp)
+; ZFINX64-NEXT: sw t5, 4(sp)
+; ZFINX64-NEXT: sw t6, 0(sp)
+; ZFINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload
; ZFINX64-NEXT: call callee_float_32
-; ZFINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: addi sp, sp, 192
+; ZFINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s2, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s4, 160(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s5, 152(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s6, 144(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s7, 136(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s8, 128(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s9, 120(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s10, 112(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s11, 104(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: addi sp, sp, 208
; ZFINX64-NEXT: ret
;
; ZDINX32-LABEL: caller_float_32:
@@ -1241,79 +1285,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
;
; ZDINX64-LABEL: caller_float_32:
; ZDINX64: # %bb.0:
-; ZDINX64-NEXT: addi sp, sp, -192
-; ZDINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: addi sp, sp, -208
+; ZDINX64-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s0, 192(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s1, 184(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s2, 176(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s3, 168(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s4, 160(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s5, 152(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s6, 144(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s7, 136(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s8, 128(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: lw t0, 208(sp)
+; ZDINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: lw t0, 216(sp)
+; ZDINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
; ZDINX64-NEXT: lw t0, 224(sp)
-; ZDINX64-NEXT: lw t1, 232(sp)
-; ZDINX64-NEXT: lw t2, 240(sp)
-; ZDINX64-NEXT: lw s0, 248(sp)
-; ZDINX64-NEXT: lw t3, 256(sp)
-; ZDINX64-NEXT: lw t4, 264(sp)
-; ZDINX64-NEXT: lw t5, 272(sp)
-; ZDINX64-NEXT: lw t6, 280(sp)
-; ZDINX64-NEXT: lw s1, 288(sp)
-; ZDINX64-NEXT: lw s2, 296(sp)
-; ZDINX64-NEXT: lw s3, 304(sp)
-; ZDINX64-NEXT: lw s4, 312(sp)
-; ZDINX64-NEXT: lw s5, 320(sp)
-; ZDINX64-NEXT: lw s6, 328(sp)
-; ZDINX64-NEXT: lw s7, 336(sp)
-; ZDINX64-NEXT: lw s8, 344(sp)
-; ZDINX64-NEXT: lw s9, 352(sp)
-; ZDINX64-NEXT: lw s10, 360(sp)
-; ZDINX64-NEXT: lw s11, 368(sp)
-; ZDINX64-NEXT: lw ra, 376(sp)
-; ZDINX64-NEXT: sw ra, 76(sp)
-; ZDINX64-NEXT: sw s11, 72(sp)
-; ZDINX64-NEXT: sw s10, 68(sp)
-; ZDINX64-NEXT: sw s9, 64(sp)
-; ZDINX64-NEXT: sw s8, 60(sp)
-; ZDINX64-NEXT: sw s7, 56(sp)
-; ZDINX64-NEXT: sw s6, 52(sp)
-; ZDINX64-NEXT: sw s5, 48(sp)
-; ZDINX64-NEXT: sw s4, 44(sp)
-; ZDINX64-NEXT: sw s3, 40(sp)
-; ZDINX64-NEXT: sw s2, 36(sp)
-; ZDINX64-NEXT: sw s1, 32(sp)
-; ZDINX64-NEXT: sw t6, 28(sp)
-; ZDINX64-NEXT: sw t5, 24(sp)
-; ZDINX64-NEXT: sw t4, 20(sp)
-; ZDINX64-NEXT: sw t3, 16(sp)
-; ZDINX64-NEXT: lw t3, 192(sp)
-; ZDINX64-NEXT: lw t4, 200(sp)
-; ZDINX64-NEXT: lw t5, 208(sp)
-; ZDINX64-NEXT: lw t6, 216(sp)
+; ZDINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: lw t0, 232(sp)
+; ZDINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: lw t6, 240(sp)
+; ZDINX64-NEXT: lw t5, 248(sp)
+; ZDINX64-NEXT: lw t4, 256(sp)
+; ZDINX64-NEXT: lw s0, 264(sp)
+; ZDINX64-NEXT: lw s1, 272(sp)
+; ZDINX64-NEXT: lw s2, 280(sp)
+; ZDINX64-NEXT: lw s3, 288(sp)
+; ZDINX64-NEXT: lw s4, 296(sp)
+; ZDINX64-NEXT: lw s5, 304(sp)
+; ZDINX64-NEXT: lw s6, 312(sp)
+; ZDINX64-NEXT: lw s7, 320(sp)
+; ZDINX64-NEXT: lw s8, 328(sp)
+; ZDINX64-NEXT: lw s9, 336(sp)
+; ZDINX64-NEXT: lw s10, 344(sp)
+; ZDINX64-NEXT: lw s11, 352(sp)
+; ZDINX64-NEXT: lw ra, 360(sp)
+; ZDINX64-NEXT: lw t3, 368(sp)
+; ZDINX64-NEXT: lw t2, 376(sp)
+; ZDINX64-NEXT: lw t1, 384(sp)
+; ZDINX64-NEXT: lw t0, 392(sp)
+; ZDINX64-NEXT: sw t0, 76(sp)
+; ZDINX64-NEXT: sw t1, 72(sp)
+; ZDINX64-NEXT: sw t2, 68(sp)
+; ZDINX64-NEXT: sw t3, 64(sp)
+; ZDINX64-NEXT: sw ra, 60(sp)
+; ZDINX64-NEXT: sw s11, 56(sp)
+; ZDINX64-NEXT: sw s10, 52(sp)
+; ZDINX64-NEXT: sw s9, 48(sp)
+; ZDINX64-NEXT: sw s8, 44(sp)
+; ZDINX64-NEXT: sw s7, 40(sp)
+; ZDINX64-NEXT: sw s6, 36(sp)
+; ZDINX64-NEXT: sw s5, 32(sp)
+; ZDINX64-NEXT: sw s4, 28(sp)
+; ZDINX64-NEXT: sw s3, 24(sp)
+; ZDINX64-NEXT: sw s2, 20(sp)
+; ZDINX64-NEXT: sw s1, 16(sp)
; ZDINX64-NEXT: sw s0, 12(sp)
-; ZDINX64-NEXT: sw t2, 8(sp)
-; ZDINX64-NEXT: sw t1, 4(sp)
-; ZDINX64-NEXT: sw t0, 0(sp)
+; ZDINX64-NEXT: sw t4, 8(sp)
+; ZDINX64-NEXT: sw t5, 4(sp)
+; ZDINX64-NEXT: sw t6, 0(sp)
+; ZDINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload
; ZDINX64-NEXT: call callee_float_32
-; ZDINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: addi sp, sp, 192
+; ZDINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s2, 176(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s4, 160(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s5, 152(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s6, 144(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s7, 136(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s8, 128(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s9, 120(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s10, 112(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s11, 104(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: addi sp, sp, 208
; ZDINX64-NEXT: ret
%C = call fastcc float @callee_float_32(<32 x float> %A)
ret float %C
diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll
index 3f32734db0ba71..bf500d1a2adb39 100644
--- a/llvm/test/CodeGen/RISCV/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith.ll
@@ -706,18 +706,11 @@ define float @fnmadd_s_3(float %a, float %b, float %c) nounwind {
; CHECKIF-NEXT: fneg.s fa0, fa5
; CHECKIF-NEXT: ret
;
-; RV32IZFINX-LABEL: fnmadd_s_3:
-; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: fmadd.s a0, a0, a1, a2
-; RV32IZFINX-NEXT: fneg.s a0, a0
-; RV32IZFINX-NEXT: ret
-;
-; RV64IZFINX-LABEL: fnmadd_s_3:
-; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: fmadd.s a0, a0, a1, a2
-; RV64IZFINX-NEXT: lui a1, 524288
-; RV64IZFINX-NEXT: xor a0, a0, a1
-; RV64IZFINX-NEXT: ret
+; CHECKIZFINX-LABEL: fnmadd_s_3:
+; CHECKIZFINX: # %bb.0:
+; CHECKIZFINX-NEXT: fmadd.s a0, a0, a1, a2
+; CHECKIZFINX-NEXT: fneg.s a0, a0
+; CHECKIZFINX-NEXT: ret
;
; RV32I-LABEL: fnmadd_s_3:
; RV32I: # %bb.0:
@@ -761,17 +754,10 @@ define float @fnmadd_nsz(float %a, float %b, float %c) nounwind {
; CHECKIF-NEXT: fnmadd.s fa0, fa0, fa1, fa2
; CHECKIF-NEXT: ret
;
-; RV32IZFINX-LABEL: fnmadd_nsz:
-; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: fnmadd.s a0, a0, a1, a2
-; RV32IZFINX-NEXT: ret
-;
-; RV64IZFINX-LABEL: fnmadd_nsz:
-; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: fmadd.s a0, a0, a1, a2
-; RV64IZFINX-NEXT: lui a1, 524288
-; RV64IZFINX-NEXT: xor a0, a0, a1
-; RV64IZFINX-NEXT: ret
+; CHECKIZFINX-LABEL: fnmadd_nsz:
+; CHECKIZFINX: # %bb.0:
+; CHECKIZFINX-NEXT: fnmadd.s a0, a0, a1, a2
+; CHECKIZFINX-NEXT: ret
;
; RV32I-LABEL: fnmadd_nsz:
; RV32I: # %bb.0:
@@ -1247,3 +1233,6 @@ define float @fsgnjx_f32(float %x, float %y) nounwind {
%mul = fmul float %z, %y
ret float %mul
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32IZFINX: {{.*}}
+; RV64IZFINX: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
index 2338219687ef75..86f6f079243c26 100644
--- a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
@@ -50,8 +50,7 @@ define float @fneg(float %a) nounwind {
;
; RV64IZFINX-LABEL: fneg:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: lui a1, 524288
-; RV64IZFINX-NEXT: xor a0, a0, a1
+; RV64IZFINX-NEXT: fneg.s a0, a0
; RV64IZFINX-NEXT: ret
%1 = fneg float %a
ret float %1
@@ -91,8 +90,7 @@ define float @fabs(float %a) nounwind {
;
; RV64IZFINX-LABEL: fabs:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: slli a0, a0, 33
-; RV64IZFINX-NEXT: srli a0, a0, 33
+; RV64IZFINX-NEXT: fabs.s a0, a0
; RV64IZFINX-NEXT: ret
%1 = call float @llvm.fabs.f32(float %a)
ret float %1
diff --git a/llvm/test/CodeGen/RISCV/float-frem.ll b/llvm/test/CodeGen/RISCV/float-frem.ll
index 651b1b116adc76..31d39a5ab6d6ea 100644
--- a/llvm/test/CodeGen/RISCV/float-frem.ll
+++ b/llvm/test/CodeGen/RISCV/float-frem.ll
@@ -27,12 +27,7 @@ define float @frem_f32(float %a, float %b) nounwind {
;
; RV64IZFINX-LABEL: frem_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call fmodf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail fmodf
;
; RV32I-LABEL: frem_f32:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll
index 69a506cd850f2c..58cbc72e2197c9 100644
--- a/llvm/test/CodeGen/RISCV/float-imm.ll
+++ b/llvm/test/CodeGen/RISCV/float-imm.ll
@@ -20,12 +20,14 @@ define float @float_imm() nounwind {
; RV32ZFINX: # %bb.0:
; RV32ZFINX-NEXT: lui a0, 263313
; RV32ZFINX-NEXT: addi a0, a0, -37
+; RV32ZFINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV32ZFINX-NEXT: ret
;
; RV64ZFINX-LABEL: float_imm:
; RV64ZFINX: # %bb.0:
; RV64ZFINX-NEXT: lui a0, 263313
; RV64ZFINX-NEXT: addiw a0, a0, -37
+; RV64ZFINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV64ZFINX-NEXT: ret
ret float 3.14159274101257324218750
}
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index 52442026dab502..b05eac9c9dee26 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -136,12 +136,7 @@ define float @sin_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: sin_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call sinf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail sinf
;
; RV32I-LABEL: sin_f32:
; RV32I: # %bb.0:
@@ -181,12 +176,7 @@ define float @cos_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: cos_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call cosf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail cosf
;
; RV32I-LABEL: cos_f32:
; RV32I: # %bb.0:
@@ -327,12 +317,7 @@ define float @pow_f32(float %a, float %b) nounwind {
;
; RV64IZFINX-LABEL: pow_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call powf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail powf
;
; RV32I-LABEL: pow_f32:
; RV32I: # %bb.0:
@@ -372,12 +357,7 @@ define float @exp_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: exp_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call expf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail expf
;
; RV32I-LABEL: exp_f32:
; RV32I: # %bb.0:
@@ -417,12 +397,7 @@ define float @exp2_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: exp2_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call exp2f
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail exp2f
;
; RV32I-LABEL: exp2_f32:
; RV32I: # %bb.0:
@@ -462,12 +437,7 @@ define float @log_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: log_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call logf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail logf
;
; RV32I-LABEL: log_f32:
; RV32I: # %bb.0:
@@ -507,12 +477,7 @@ define float @log10_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: log10_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call log10f
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail log10f
;
; RV32I-LABEL: log10_f32:
; RV32I: # %bb.0:
@@ -552,12 +517,7 @@ define float @log2_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: log2_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call log2f
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail log2f
;
; RV32I-LABEL: log2_f32:
; RV32I: # %bb.0:
@@ -698,8 +658,7 @@ define float @fabs_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: fabs_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: slli a0, a0, 33
-; RV64IZFINX-NEXT: srli a0, a0, 33
+; RV64IZFINX-NEXT: fabs.s a0, a0
; RV64IZFINX-NEXT: ret
;
; RV32I-LABEL: fabs_f32:
@@ -1195,12 +1154,7 @@ define float @nearbyint_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: nearbyint_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call nearbyintf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail nearbyintf
;
; RV32I-LABEL: nearbyint_f32:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index 81e29329e71817..18cdb18106f343 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -153,8 +153,8 @@ define half @powi_f16(half %a, i32 %b) nounwind {
; RV64IZHINX: # %bb.0:
; RV64IZHINX-NEXT: addi sp, sp, -16
; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZHINX-NEXT: sext.w a1, a1
; RV64IZHINX-NEXT: fcvt.s.h a0, a0
+; RV64IZHINX-NEXT: sext.w a1, a1
; RV64IZHINX-NEXT: call __powisf2
; RV64IZHINX-NEXT: fcvt.h.s a0, a0
; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -233,8 +233,8 @@ define half @powi_f16(half %a, i32 %b) nounwind {
; RV64IZHINXMIN: # %bb.0:
; RV64IZHINXMIN-NEXT: addi sp, sp, -16
; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZHINXMIN-NEXT: sext.w a1, a1
; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0
+; RV64IZHINXMIN-NEXT: sext.w a1, a1
; RV64IZHINXMIN-NEXT: call __powisf2
; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0
; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/kcfi-mir.ll b/llvm/test/CodeGen/RISCV/kcfi-mir.ll
index e478930d59abc5..f35be0564cb25f 100644
--- a/llvm/test/CodeGen/RISCV/kcfi-mir.ll
+++ b/llvm/test/CodeGen/RISCV/kcfi-mir.ll
@@ -10,7 +10,7 @@ define void @f1(ptr noundef %x) !kcfi_type !1 {
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
; CHECK-NEXT: SD killed $x1, $x2, 8 :: (store (s64) into %stack.0)
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8
- ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit-def dead $x1, implicit-def $x2, implicit-def $x2_h, implicit killed $x10 {
+ ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_w, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_w, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_w, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_w, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_w, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_w, implicit-def $x31_h, implicit-def dead $x1, implicit-def $x2, implicit-def $x2_w, implicit-def $x2_h, implicit killed $x10 {
; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31
; CHECK-NEXT: PseudoCALLIndirect killed $x10, csr_ilp32_lp64, implicit-def dead $x1, implicit-def $x2
; CHECK-NEXT: }
@@ -26,7 +26,7 @@ define void @f2(ptr noundef %x) #0 {
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $x10
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit killed $x10, implicit $x2 {
+ ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_w, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_w, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_w, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_w, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_w, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_w, implicit-def $x31_h, implicit killed $x10, implicit $x2 {
; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31
; CHECK-NEXT: PseudoTAILIndirect killed $x10, implicit $x2
; CHECK-NEXT: }
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 30f9dd1e516585..557bca0b73c8a6 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -62,8 +62,10 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: call __truncsfhf2
; RV32IZFINXZDINX-NEXT: lw a1, 8(sp)
+; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
; RV32IZFINXZDINX-NEXT: lui a2, 1048560
; RV32IZFINXZDINX-NEXT: or a0, a0, a2
+; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
; RV32IZFINXZDINX-NEXT: ret
@@ -77,8 +79,10 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
; RV64IZFINXZDINX-NEXT: call frexpf
; RV64IZFINXZDINX-NEXT: call __truncsfhf2
; RV64IZFINXZDINX-NEXT: ld a1, 0(sp)
+; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
; RV64IZFINXZDINX-NEXT: lui a2, 1048560
; RV64IZFINXZDINX-NEXT: or a0, a0, a2
+; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
; RV64IZFINXZDINX-NEXT: ret
@@ -157,8 +161,10 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind {
; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: call __truncsfhf2
+; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
; RV32IZFINXZDINX-NEXT: lui a1, 1048560
; RV32IZFINXZDINX-NEXT: or a0, a0, a1
+; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
; RV32IZFINXZDINX-NEXT: ret
@@ -171,8 +177,10 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind {
; RV64IZFINXZDINX-NEXT: mv a1, sp
; RV64IZFINXZDINX-NEXT: call frexpf
; RV64IZFINXZDINX-NEXT: call __truncsfhf2
+; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
; RV64IZFINXZDINX-NEXT: lui a1, 1048560
; RV64IZFINXZDINX-NEXT: or a0, a0, a1
+; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
; RV64IZFINXZDINX-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir b/llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir
new file mode 100644
index 00000000000000..d0223dc5911ad3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir
@@ -0,0 +1,296 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=riscv32 -mattr=+c,+zfinx -simplify-mir \
+# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s
+# RUN: llc -o - %s -mtriple=riscv64 -mattr=+c,+zfinx -simplify-mir \
+# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s
+
+--- |
+
+ define void @store_common_value_float(ptr %a, ptr %b, ptr %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j) #0 {
+ entry:
+ store float %j, ptr %a, align 4
+ store float %j, ptr %b, align 4
+ store float %j, ptr %c, align 4
+ ret void
+ }
+
+ define void @store_common_ptr_float(float %a, float %b, float %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, ptr %p) #0 {
+ entry:
+ store volatile float %a, ptr %p, align 4
+ store volatile float %b, ptr %p, align 4
+ store volatile float %c, ptr %p, align 4
+ ret void
+ }
+
+ define void @load_common_ptr_float(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ptr %g) #0 {
+ entry:
+ %0 = load float, ptr %g, align 4
+ %arrayidx1 = getelementptr inbounds float, ptr %g, i32 1
+ %1 = load float, ptr %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %g, i32 2
+ %2 = load float, ptr %arrayidx2, align 4
+ tail call void @load_common_ptr_float_1(float %0, float %1, float %2)
+ ret void
+ }
+
+ declare void @load_common_ptr_float_1(float, float, float) #0
+
+ define void @store_large_offset_float(ptr %p, float %a, float %b, float %c, float %d) #0 {
+ entry:
+ %0 = getelementptr inbounds float, ptr %p, i32 100
+ store volatile float %a, ptr %0, align 4
+ %1 = getelementptr inbounds float, ptr %p, i32 101
+ store volatile float %b, ptr %1, align 4
+ %2 = getelementptr inbounds float, ptr %p, i32 102
+ store volatile float %c, ptr %2, align 4
+ %3 = getelementptr inbounds float, ptr %p, i32 103
+ store volatile float %d, ptr %3, align 4
+ ret void
+ }
+
+ define void @load_large_offset_float(ptr %p) #0 {
+ entry:
+ %arrayidx = getelementptr inbounds float, ptr %p, i32 100
+ %0 = load float, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, ptr %p, i32 101
+ %1 = load float, ptr %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %p, i32 102
+ %2 = load float, ptr %arrayidx2, align 4
+ tail call void @load_large_offset_float_1(float %0, float %1, float %2)
+ ret void
+ }
+
+ declare void @load_large_offset_float_1(float, float, float) #0
+
+ define void @store_common_value_float_no_opt(ptr %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h) #0 {
+ entry:
+ store float %h, ptr %a, align 4
+ ret void
+ }
+
+ define void @store_common_ptr_float_no_opt(float %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, ptr %p) #0 {
+ entry:
+ store volatile float %a, ptr %p, align 4
+ ret void
+ }
+
+ define float @load_common_ptr_float_no_opt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ptr %g) #0 {
+ entry:
+ %0 = load float, ptr %g, align 4
+ ret float %0
+ }
+
+ define void @store_large_offset_float_no_opt(ptr %p, float %a, float %b) #0 {
+ entry:
+ %0 = getelementptr inbounds float, ptr %p, i32 100
+ store volatile float %a, ptr %0, align 4
+ %1 = getelementptr inbounds float, ptr %p, i32 101
+ store volatile float %b, ptr %1, align 4
+ ret void
+ }
+
+ define { float, float } @load_large_offset_float_no_opt(ptr %p) #0 {
+ entry:
+ %arrayidx = getelementptr inbounds float, ptr %p, i32 100
+ %0 = load float, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, ptr %p, i32 101
+ %1 = load float, ptr %arrayidx1, align 4
+ %2 = insertvalue { float, float } undef, float %0, 0
+ %3 = insertvalue { float, float } %2, float %1, 1
+ ret { float, float } %3
+ }
+
+ attributes #0 = { minsize }
+
+...
+---
+name: store_common_value_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11, $x12
+
+ ; CHECK-LABEL: name: store_common_value_float
+ ; CHECK: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x13_w = PseudoMV_FPR32INX $x0_w
+ ; CHECK-NEXT: SW_INX $x13_w, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ ; CHECK-NEXT: SW_INX $x13_w, killed renamable $x11, 0 :: (store (s32) into %ir.b)
+ ; CHECK-NEXT: SW_INX killed $x13_w, killed renamable $x12, 0 :: (store (s32) into %ir.c)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX $x0_w, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ SW_INX $x0_w, killed renamable $x11, 0 :: (store (s32) into %ir.b)
+ SW_INX killed $x0_w, killed renamable $x12, 0 :: (store (s32) into %ir.c)
+ PseudoRET
+
+...
+---
+name: store_common_ptr_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10_w, $x11_w, $x12_w, $x16
+
+ ; CHECK-LABEL: name: store_common_ptr_float
+ ; CHECK: liveins: $x10_w, $x11_w, $x12_w, $x16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x13 = ADDI $x16, 0
+ ; CHECK-NEXT: SW_INX killed renamable $x10_w, $x13, 0 :: (volatile store (s32) into %ir.p)
+ ; CHECK-NEXT: SW_INX killed renamable $x11_w, $x13, 0 :: (volatile store (s32) into %ir.p)
+ ; CHECK-NEXT: SW_INX killed renamable $x12_w, killed $x13, 0 :: (volatile store (s32) into %ir.p)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x10_w, renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ SW_INX killed renamable $x11_w, renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ SW_INX killed renamable $x12_w, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ PseudoRET
+
+...
+---
+name: load_common_ptr_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x16
+
+ ; CHECK-LABEL: name: load_common_ptr_float
+ ; CHECK: liveins: $x16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x13 = ADDI $x16, 0
+ ; CHECK-NEXT: renamable $x10_w = LW_INX $x13, 0 :: (load (s32) from %ir.g)
+ ; CHECK-NEXT: renamable $x11_w = LW_INX $x13, 4 :: (load (s32) from %ir.arrayidx1)
+ ; CHECK-NEXT: renamable $x12_w = LW_INX killed $x13, 8 :: (load (s32) from %ir.arrayidx2)
+ ; CHECK-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $x10_w, implicit $x11_w, implicit $x12_w
+ renamable $x10_w = LW_INX renamable $x16, 0 :: (load (s32) from %ir.g)
+ renamable $x11_w = LW_INX renamable $x16, 4 :: (load (s32) from %ir.arrayidx1)
+ renamable $x12_w = LW_INX killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2)
+ PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $x10_w, implicit $x11_w, implicit $x12_w
+
+...
+---
+name: store_large_offset_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11_w, $x11_w, $x12_w, $x13_w
+
+ ; CHECK-LABEL: name: store_large_offset_float
+ ; CHECK: liveins: $x10, $x11_w, $x11_w, $x12_w, $x13_w
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x14 = ADDI $x10, 384
+ ; CHECK-NEXT: SW_INX killed renamable $x10_w, $x14, 16 :: (volatile store (s32) into %ir.0)
+ ; CHECK-NEXT: SW_INX killed renamable $x11_w, $x14, 20 :: (volatile store (s32) into %ir.1)
+ ; CHECK-NEXT: SW_INX killed renamable $x12_w, $x14, 24 :: (volatile store (s32) into %ir.2)
+ ; CHECK-NEXT: SW_INX killed renamable $x13_w, killed $x14, 28 :: (volatile store (s32) into %ir.3)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x10_w, renamable $x10, 400 :: (volatile store (s32) into %ir.0)
+ SW_INX killed renamable $x11_w, renamable $x10, 404 :: (volatile store (s32) into %ir.1)
+ SW_INX killed renamable $x12_w, renamable $x10, 408 :: (volatile store (s32) into %ir.2)
+ SW_INX killed renamable $x13_w, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3)
+ PseudoRET
+
+...
+---
+name: load_large_offset_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: load_large_offset_float
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x14 = ADDI $x10, 384
+ ; CHECK-NEXT: renamable $x11_w = LW_INX $x14, 16 :: (load (s32) from %ir.arrayidx)
+ ; CHECK-NEXT: renamable $x12_w = LW_INX $x14, 20 :: (load (s32) from %ir.arrayidx1)
+ ; CHECK-NEXT: renamable $x13_w = LW_INX killed $x14, 24 :: (load (s32) from %ir.arrayidx2)
+ ; CHECK-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $x11_w, implicit $x12_w, implicit $x12_w
+ renamable $x11_w = LW_INX renamable $x10, 400 :: (load (s32) from %ir.arrayidx)
+ renamable $x12_w = LW_INX renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
+ renamable $x13_w = LW_INX killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2)
+ PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $x11_w, implicit $x12_w, implicit $x12_w
+
+...
+---
+name: store_common_value_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x16_w
+
+ ; CHECK-LABEL: name: store_common_value_float_no_opt
+ ; CHECK: liveins: $x10, $x16_w
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW_INX killed renamable $x16_w, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x16_w, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ PseudoRET
+
+...
+---
+name: store_common_ptr_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x16, $x10_w
+
+ ; CHECK-LABEL: name: store_common_ptr_float_no_opt
+ ; CHECK: liveins: $x16, $x10_w
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW_INX killed renamable $x10_w, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x10_w, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ PseudoRET
+
+...
+---
+name: load_common_ptr_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x16
+
+ ; CHECK-LABEL: name: load_common_ptr_float_no_opt
+ ; CHECK: liveins: $x16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10_w = LW_INX killed renamable $x16, 0 :: (load (s32) from %ir.g)
+ ; CHECK-NEXT: PseudoRET implicit $x10_w
+ renamable $x10_w = LW_INX killed renamable $x16, 0 :: (load (s32) from %ir.g)
+ PseudoRET implicit $x10_w
+
+...
+---
+name: store_large_offset_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11_w, $x12_w
+
+ ; CHECK-LABEL: name: store_large_offset_float_no_opt
+ ; CHECK: liveins: $x10, $x11_w, $x12_w
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW_INX killed renamable $x11_w, renamable $x10, 400 :: (volatile store (s32) into %ir.0)
+ ; CHECK-NEXT: SW_INX killed renamable $x12_w, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x11_w, renamable $x10, 400 :: (volatile store (s32) into %ir.0)
+ SW_INX killed renamable $x12_w, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1)
+ PseudoRET
+
+...
+---
+name: load_large_offset_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: load_large_offset_float_no_opt
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x11_w = LW_INX renamable $x10, 400 :: (load (s32) from %ir.arrayidx)
+ ; CHECK-NEXT: renamable $x12_w = LW_INX killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
+ ; CHECK-NEXT: PseudoRET implicit $x11_w, implicit $x12_w
+ renamable $x11_w = LW_INX renamable $x10, 400 :: (load (s32) from %ir.arrayidx)
+ renamable $x12_w = LW_INX killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
+ PseudoRET implicit $x11_w, implicit $x12_w
+
+...
>From e8722ba64a5ec2f17287081b803442566aad0b8a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sat, 28 Sep 2024 01:02:05 -0700
Subject: [PATCH 2/6] Update llvm/lib/Target/RISCV/RISCVCallingConv.cpp
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index 6a96d16c5f02d5..e1b32581624039 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -783,8 +783,7 @@ bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
- if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
- (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
+ if ((LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
Subtarget.is64Bit())) {
if (MCRegister Reg = State.AllocateReg(GPRList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
>From 42807d38b3154079b98d266391fdb349c482ce1b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sat, 28 Sep 2024 01:02:18 -0700
Subject: [PATCH 3/6] Update llvm/lib/Target/RISCV/RISCVInstrInfoF.td
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 1146637f106e7a..000b7cfedb0f91 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -310,7 +310,7 @@ def LW_INX : Load_ri<0b010, "lw", GPRF32>, Sched<[WriteLDW, ReadMemBase]>;
def SW_INX : Store_rri<0b010, "sw", GPRF32>,
Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
-// ADDI with GPRF16 register class to use for copy. This should not be used as
+// ADDI with GPRF32 register class to use for copy. This should not be used as
// general ADDI, so the immediate should always be zero.
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveReg = 1,
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
>From 846f3ba8c1642bd434aac14c88f6f24ea41fcea8 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sat, 28 Sep 2024 10:24:07 -0700
Subject: [PATCH 4/6] Update llvm/lib/Target/RISCV/RISCVCallingConv.cpp
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index e1b32581624039..6ed61ea4741523 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -401,7 +401,7 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
- if ((ValVT == MVT::f32 && Subtarget.hasStdExtZfinx())) {
+ if (ValVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
if (MCRegister Reg = State.AllocateReg(getArgGPR32s(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
>From aba04177fbc10d4c44075b087c3ca125ed7f0ad0 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sat, 28 Sep 2024 10:24:17 -0700
Subject: [PATCH 5/6] Update llvm/lib/Target/RISCV/RISCVCallingConv.cpp
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index 6ed61ea4741523..4bc74b0cbddee9 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -783,8 +783,8 @@ bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
- if ((LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
- Subtarget.is64Bit())) {
+ if (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
+ Subtarget.is64Bit()) {
if (MCRegister Reg = State.AllocateReg(GPRList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
>From 9e0bcd68b178af144dbfcc3a1f50e1fa35c18274 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 30 Sep 2024 13:25:32 -0700
Subject: [PATCH 6/6] fixup! clang-format
---
llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index 4bc74b0cbddee9..7038d4e2610ced 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -783,8 +783,7 @@ bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
- if (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
- Subtarget.is64Bit()) {
+ if (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && Subtarget.is64Bit()) {
if (MCRegister Reg = State.AllocateReg(GPRList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
More information about the llvm-commits
mailing list