[llvm] bc91f3c - [RISCV] Add 32 bit GPR sub-register for Zfinx. (#108336)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 1 22:09:31 PDT 2024
Author: Craig Topper
Date: 2024-10-01T22:09:27-07:00
New Revision: bc91f3cdd57cbe4b0a456626f52960158cb3232f
URL: https://github.com/llvm/llvm-project/commit/bc91f3cdd57cbe4b0a456626f52960158cb3232f
DIFF: https://github.com/llvm/llvm-project/commit/bc91f3cdd57cbe4b0a456626f52960158cb3232f.diff
LOG: [RISCV] Add 32 bit GPR sub-register for Zfinx. (#108336)
This patches adds a 32 bit register class for use with Zfinx instructions. This makes them more similar to F instructions and allows us to only spill 32 bits.
I've added CodeGenOnly instructions for load/store using GPRF32 as that gave better results than insert_subreg/extract_subreg.
Function arguments use this new GPRF32 register class for f32 arguments with Zfinx. Eliminating the need to use RISCVISD::FMV* nodes.
This is similar to #107446 which adds a 16 bit register class.
Added:
llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir
Modified:
llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
llvm/lib/Target/RISCV/RISCVCallingConv.cpp
llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/lib/Target/RISCV/RISCVInstrInfoC.td
llvm/lib/Target/RISCV/RISCVInstrInfoF.td
llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
llvm/lib/Target/RISCV/RISCVRegisterInfo.td
llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
llvm/test/CodeGen/RISCV/float-arith.ll
llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
llvm/test/CodeGen/RISCV/float-frem.ll
llvm/test/CodeGen/RISCV/float-imm.ll
llvm/test/CodeGen/RISCV/float-intrinsics.ll
llvm/test/CodeGen/RISCV/half-intrinsics.ll
llvm/test/CodeGen/RISCV/kcfi-mir.ll
llvm/test/CodeGen/RISCV/llvm.frexp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index fbad7d5d02db6c..78d6d7587160a3 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -485,8 +485,14 @@ struct RISCVOperand final : public MCParsedAsmOperand {
RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum);
}
+ bool isGPRF32() const {
+ return Kind == KindTy::Register &&
+ RISCVMCRegisterClasses[RISCV::GPRF32RegClassID].contains(Reg.RegNum);
+ }
+
bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; }
+ bool isGPRAsFPR32() const { return isGPRF32() && Reg.IsGPRAsFPR; }
bool isGPRPairAsFPR() const { return isGPRPair() && Reg.IsGPRAsFPR; }
bool isGPRPair() const {
@@ -1352,6 +1358,10 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_H;
return Match_Success;
}
+ if (Kind == MCK_GPRAsFPR32 && Op.isGPRAsFPR()) {
+ Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_W;
+ return Match_Success;
+ }
// There are some GPRF64AsFPR instructions that have no RV32 equivalent. We
// reject them at parsing thinking we should match as GPRPairAsFPR for RV32.
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index c2659a51b02096..7c8206cb44dec2 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -94,6 +94,19 @@ static DecodeStatus DecodeGPRF16RegisterClass(MCInst &Inst, uint32_t RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeGPRF32RegisterClass(MCInst &Inst, uint32_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE);
+
+ if (RegNo >= 32 || (IsRVE && RegNo >= 16))
+ return MCDisassembler::Fail;
+
+ MCRegister Reg = RISCV::X0_W + RegNo;
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index d610f0b956027a..d3bfbb0943766e 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -156,6 +156,23 @@ static ArrayRef<MCPhysReg> getArgGPR16s(const RISCVABI::ABI ABI) {
return ArrayRef(ArgIGPRs);
}
+static ArrayRef<MCPhysReg> getArgGPR32s(const RISCVABI::ABI ABI) {
+ // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
+ // the ILP32E ABI.
+ static const MCPhysReg ArgIGPRs[] = {RISCV::X10_W, RISCV::X11_W, RISCV::X12_W,
+ RISCV::X13_W, RISCV::X14_W, RISCV::X15_W,
+ RISCV::X16_W, RISCV::X17_W};
+ // The GPRs used for passing arguments in the ILP32E/LP64E ABI.
+ static const MCPhysReg ArgEGPRs[] = {RISCV::X10_W, RISCV::X11_W,
+ RISCV::X12_W, RISCV::X13_W,
+ RISCV::X14_W, RISCV::X15_W};
+
+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
+ return ArrayRef(ArgEGPRs);
+
+ return ArrayRef(ArgIGPRs);
+}
+
static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
// for save-restore libcall, so we don't use them.
@@ -194,6 +211,26 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRF16s(const RISCVABI::ABI ABI) {
return ArrayRef(FastCCIGPRs);
}
+static ArrayRef<MCPhysReg> getFastCCArgGPRF32s(const RISCVABI::ABI ABI) {
+ // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
+ // for save-restore libcall, so we don't use them.
+ // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
+ static const MCPhysReg FastCCIGPRs[] = {
+ RISCV::X10_W, RISCV::X11_W, RISCV::X12_W, RISCV::X13_W,
+ RISCV::X14_W, RISCV::X15_W, RISCV::X16_W, RISCV::X17_W,
+ RISCV::X28_W, RISCV::X29_W, RISCV::X30_W, RISCV::X31_W};
+
+ // The GPRs used for passing arguments in the FastCC when using ILP32E/LP64E.
+ static const MCPhysReg FastCCEGPRs[] = {RISCV::X10_W, RISCV::X11_W,
+ RISCV::X12_W, RISCV::X13_W,
+ RISCV::X14_W, RISCV::X15_W};
+
+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
+ return ArrayRef(FastCCEGPRs);
+
+ return ArrayRef(FastCCIGPRs);
+}
+
// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
@@ -364,11 +401,17 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
+ if (ValVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
+ if (MCRegister Reg = State.AllocateReg(getArgGPR32s(ABI))) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
- // Zfinx/Zdinx use GPR without a bitcast when possible.
- if ((LocVT == MVT::f32 && XLen == 32 && Subtarget.hasStdExtZfinx()) ||
- (LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx())) {
+ // Zdinx use GPR without a bitcast when possible.
+ if (LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx()) {
if (MCRegister Reg = State.AllocateReg(ArgGPRs)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
@@ -616,10 +659,16 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
+ // Check if there is an available GPRF32 before hitting the stack.
+ if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
+ if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF32s(ABI))) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
// Check if there is an available GPR before hitting the stack.
- if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
- (LocVT == MVT::f64 && Subtarget.is64Bit() &&
- Subtarget.hasStdExtZdinx())) {
+ if (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx()) {
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
if (LocVT.getSizeInBits() != Subtarget.getXLen()) {
LocVT = XLenVT;
@@ -723,9 +772,18 @@ bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
- if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
- (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
- Subtarget.is64Bit())) {
+ if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
+ static const MCPhysReg GPR32List[] = {
+ RISCV::X9_W, RISCV::X18_W, RISCV::X19_W, RISCV::X20_W,
+ RISCV::X21_W, RISCV::X22_W, RISCV::X23_W, RISCV::X24_W,
+ RISCV::X25_W, RISCV::X26_W, RISCV::X27_W};
+ if (MCRegister Reg = State.AllocateReg(GPR32List)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && Subtarget.is64Bit()) {
if (MCRegister Reg = State.AllocateReg(GPRList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
index 713c7a0661defe..d913c0b201a20c 100644
--- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
+++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
@@ -97,6 +97,8 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
if (RC && RC->contains(RISCV::X0)) {
X0Reg = RISCV::X0;
+ } else if (RC && RC->contains(RISCV::X0_W)) {
+ X0Reg = RISCV::X0_W;
} else if (RC && RC->contains(RISCV::X0_H)) {
X0Reg = RISCV::X0_H;
} else {
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 2501256ca6adf0..5dcec078856ead 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -50,6 +50,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI, unsigned Opcode);
bool expandMV_FPR16INX(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
+ bool expandMV_FPR32INX(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
bool expandRV32ZdinxStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandRV32ZdinxLoad(MachineBasicBlock &MBB,
@@ -108,6 +110,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
switch (MBBI->getOpcode()) {
case RISCV::PseudoMV_FPR16INX:
return expandMV_FPR16INX(MBB, MBBI);
+ case RISCV::PseudoMV_FPR32INX:
+ return expandMV_FPR32INX(MBB, MBBI);
case RISCV::PseudoRV32ZdinxSD:
return expandRV32ZdinxStore(MBB, MBBI);
case RISCV::PseudoRV32ZdinxLD:
@@ -287,6 +291,23 @@ bool RISCVExpandPseudo::expandMV_FPR16INX(MachineBasicBlock &MBB,
return true;
}
+bool RISCVExpandPseudo::expandMV_FPR32INX(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ const TargetRegisterInfo *TRI = STI->getRegisterInfo();
+ Register DstReg = TRI->getMatchingSuperReg(
+ MBBI->getOperand(0).getReg(), RISCV::sub_32, &RISCV::GPRRegClass);
+ Register SrcReg = TRI->getMatchingSuperReg(
+ MBBI->getOperand(1).getReg(), RISCV::sub_32, &RISCV::GPRRegClass);
+
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DstReg)
+ .addReg(SrcReg, getKillRegState(MBBI->getOperand(1).isKill()))
+ .addImm(0);
+
+ MBBI->eraseFromParent(); // The pseudo instruction is gone now.
+ return true;
+}
+
// This function expands the PseudoRV32ZdinxSD for storing a double-precision
// floating-point value into memory by generating an equivalent instruction
// sequence for RV32.
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 23479c2edf1d91..3e3f3c2eca1468 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -931,6 +931,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
Res =
CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
+ } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
+ Res =
+ CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
} else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
Res = CurDAG->getMachineNode(
Opc, DL, VT, Imm,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 91503bd7f41f3c..b8539a5d1add14 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -110,6 +110,7 @@ Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
MemBytes = 2;
break;
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::FLW:
case RISCV::LWU:
MemBytes = 4;
@@ -150,6 +151,7 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
MemBytes = 2;
break;
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::FSW:
MemBytes = 4;
break;
@@ -471,6 +473,13 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ if (RISCV::GPRF32RegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR32INX), DstReg)
+ .addReg(SrcReg,
+ getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc));
+ return;
+ }
+
if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
// Emit an ADDI for both parts of GPRPair.
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
@@ -595,6 +604,9 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::SH_INX;
IsScalableVector = false;
+ } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
+ Opcode = RISCV::SW_INX;
+ IsScalableVector = false;
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxSD;
IsScalableVector = false;
@@ -681,6 +693,9 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::LH_INX;
IsScalableVector = false;
+ } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
+ Opcode = RISCV::LW_INX;
+ IsScalableVector = false;
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxLD;
IsScalableVector = false;
@@ -1554,6 +1569,7 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
switch (Opcode) {
case RISCV::PseudoMV_FPR16INX:
+ case RISCV::PseudoMV_FPR32INX:
// MV is always compressible to either c.mv or c.li rd, 0.
return STI.hasStdExtCOrZca() ? 2 : 4;
case TargetOpcode::STACKMAP:
@@ -2614,6 +2630,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
case RISCV::LH_INX:
case RISCV::LHU:
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::LWU:
case RISCV::LD:
case RISCV::FLH:
@@ -2623,6 +2640,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
case RISCV::SH:
case RISCV::SH_INX:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::SD:
case RISCV::FSH:
case RISCV::FSW:
@@ -2692,9 +2710,11 @@ bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
case RISCV::SH_INX:
case RISCV::FSH:
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::LWU:
case RISCV::FLW:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::FSW:
case RISCV::LD:
case RISCV::FLD:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 3f279b7a58ca68..7d742322b42969 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -331,6 +331,15 @@ def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>,
let Inst{5} = imm{6};
}
+let isCodeGenOnly = 1 in
+def C_LW_INX : CLoad_ri<0b010, "c.lw", GPRF32C, uimm7_lsb00>,
+ Sched<[WriteLDW, ReadMemBase]> {
+ bits<7> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6} = imm{2};
+ let Inst{5} = imm{6};
+}
+
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
@@ -365,6 +374,15 @@ def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>,
let Inst{5} = imm{6};
}
+let isCodeGenOnly = 1 in
+def C_SW_INX : CStore_rri<0b110, "c.sw", GPRF32C, uimm7_lsb00>,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
+ bits<7> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6} = imm{2};
+ let Inst{5} = imm{6};
+}
+
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>,
@@ -517,6 +535,13 @@ def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>,
let Inst{3-2} = imm{7-6};
}
+let isCodeGenOnly = 1 in
+def C_LWSP_INX : CStackLoad<0b010, "c.lwsp", GPRF32NoX0, uimm8_lsb00>,
+ Sched<[WriteLDW, ReadMemBase]> {
+ let Inst{6-4} = imm{4-2};
+ let Inst{3-2} = imm{7-6};
+}
+
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>,
@@ -575,6 +600,13 @@ def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>,
let Inst{8-7} = imm{7-6};
}
+let isCodeGenOnly = 1 in
+def C_SWSP_INX : CStackStore<0b110, "c.swsp", GPRF32, uimm8_lsb00>,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
+ let Inst{12-9} = imm{5-2};
+ let Inst{8-7} = imm{7-6};
+}
+
let DecoderNamespace = "RISCV32Only_",
Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>,
@@ -869,6 +901,10 @@ def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm),
let Predicates = [HasStdExtCOrZca] in {
def : CompressPat<(LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
+
+let isCompressOnly = true in
+def : CompressPat<(LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
+ (C_LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
@@ -889,6 +925,10 @@ def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm),
let Predicates = [HasStdExtCOrZca] in {
def : CompressPat<(SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
+
+let isCompressOnly = true in
+def : CompressPat<(SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
+ (C_SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
@@ -992,6 +1032,10 @@ def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm),
let Predicates = [HasStdExtCOrZca] in {
def : CompressPat<(LW GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
(C_LWSP GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
+
+let isCompressOnly = true in
+def : CompressPat<(LW_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
+ (C_LWSP_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
@@ -1034,6 +1078,10 @@ def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm),
let Predicates = [HasStdExtCOrZca] in {
def : CompressPat<(SW GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
(C_SWSP GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
+
+let isCompressOnly = true in
+def : CompressPat<(SW_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
+ (C_SWSP_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index a00acb372dc2a2..000b7cfedb0f91 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -83,15 +83,14 @@ def any_fma_nsz : PatFrag<(ops node:$rs1, node:$rs2, node:$rs3),
// Zfinx
-def GPRAsFPR : AsmOperandClass {
- let Name = "GPRAsFPR";
+def GPRAsFPR32 : AsmOperandClass {
+ let Name = "GPRAsFPR32";
let ParserMethod = "parseGPRAsFPR";
let RenderMethod = "addRegOperands";
}
def FPR32INX : RegisterOperand<GPRF32> {
- let ParserMatchClass = GPRAsFPR;
- let DecoderMethod = "DecodeGPRRegisterClass";
+ let ParserMatchClass = GPRAsFPR32;
}
// Describes a combination of predicates from F/D/Zfh/Zfhmin or
@@ -306,6 +305,19 @@ def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>;
def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
} // Predicates = [HasStdExtF]
+let Predicates = [HasStdExtZfinx], isCodeGenOnly = 1 in {
+def LW_INX : Load_ri<0b010, "lw", GPRF32>, Sched<[WriteLDW, ReadMemBase]>;
+def SW_INX : Store_rri<0b010, "sw", GPRF32>,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+
+// ADDI with GPRF32 register class to use for copy. This should not be used as
+// general ADDI, so the immediate should always be zero.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveReg = 1,
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def PseudoMV_FPR32INX : Pseudo<(outs GPRF32:$rd), (ins GPRF32:$rs), []>,
+ Sched<[WriteIALU, ReadIALU]>;
+}
+
foreach Ext = FExts in {
let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in {
defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", Ext>;
@@ -685,12 +697,10 @@ defm Select_FPR32INX : SelectCC_GPR_rrirr<FPR32INX, f32>;
def PseudoFROUND_S_INX : PseudoFROUND<FPR32INX, f32>;
/// Loads
-def : Pat<(f32 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
- (COPY_TO_REGCLASS (LW GPR:$rs1, simm12:$imm12), GPRF32)>;
+def : LdPat<load, LW_INX, f32>;
/// Stores
-def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
- (SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
+def : StPat<store, SW_INX, GPRF32, f32>;
} // Predicates = [HasStdExtZfinx]
let Predicates = [HasStdExtF] in {
@@ -701,8 +711,8 @@ def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
let Predicates = [HasStdExtZfinx] in {
// Moves (no conversion)
-def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>;
-def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>;
+def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (EXTRACT_SUBREG GPR:$rs1, sub_32)>;
+def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR32INX:$rs1, sub_32)>;
} // Predicates = [HasStdExtZfinx]
let Predicates = [HasStdExtF] in {
@@ -781,8 +791,8 @@ def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, FRM_DYN)>;
let Predicates = [HasStdExtZfinx, IsRV64] in {
// Moves (no conversion)
-def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (COPY_TO_REGCLASS GPR:$src, GPRF32)>;
-def : Pat<(riscv_fmv_x_anyextw_rv64 GPRF32:$src), (COPY_TO_REGCLASS GPRF32:$src, GPR)>;
+def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (EXTRACT_SUBREG GPR:$src, sub_32)>;
+def : Pat<(riscv_fmv_x_anyextw_rv64 GPRF32:$src), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR32INX:$src, sub_32)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index 5973e5bf2e5252..df5501e37f8313 100644
--- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -109,7 +109,9 @@ static unsigned log2LdstWidth(unsigned Opcode) {
case RISCV::SH_INX:
return 1;
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::FLW:
case RISCV::FSW:
return 2;
@@ -136,7 +138,9 @@ static unsigned offsetMask(unsigned Opcode) {
case RISCV::SH_INX:
return maskTrailingOnes<unsigned>(1U);
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::FLW:
case RISCV::FSW:
case RISCV::LD:
@@ -178,6 +182,7 @@ static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
static bool isCompressedReg(Register Reg) {
return RISCV::GPRCRegClass.contains(Reg) ||
RISCV::GPRF16CRegClass.contains(Reg) ||
+ RISCV::GPRF32CRegClass.contains(Reg) ||
RISCV::FPR32CRegClass.contains(Reg) ||
RISCV::FPR64CRegClass.contains(Reg);
}
@@ -195,6 +200,7 @@ static bool isCompressibleLoad(const MachineInstr &MI) {
case RISCV::LHU:
return STI.hasStdExtZcb();
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::LD:
return STI.hasStdExtCOrZca();
case RISCV::FLW:
@@ -216,6 +222,7 @@ static bool isCompressibleStore(const MachineInstr &MI) {
case RISCV::SH_INX:
return STI.hasStdExtZcb();
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::SD:
return STI.hasStdExtCOrZca();
case RISCV::FSW:
@@ -329,6 +336,8 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI,
RCToScavenge = &RISCV::GPRCRegClass;
else if (RISCV::GPRF16RegClass.contains(RegImm.Reg))
RCToScavenge = &RISCV::GPRF16CRegClass;
+ else if (RISCV::GPRF32RegClass.contains(RegImm.Reg))
+ RCToScavenge = &RISCV::GPRF32CRegClass;
else if (RISCV::FPR32RegClass.contains(RegImm.Reg))
RCToScavenge = &RISCV::FPR32CRegClass;
else if (RISCV::FPR64RegClass.contains(RegImm.Reg))
@@ -424,6 +433,11 @@ bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR16INX),
NewReg)
.addReg(RegImm.Reg);
+ } else if (RISCV::GPRF32RegClass.contains(RegImm.Reg)) {
+ assert(RegImm.Imm == 0);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR32INX),
+ NewReg)
+ .addReg(RegImm.Reg);
} else {
// If we are looking at replacing an FPR register we don't expect to
// have any offset. The only compressible FP instructions with an offset
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index b3a2877edde4e3..a324deb4e48f5c 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -387,6 +387,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
case RISCV::LH:
case RISCV::LH_INX:
case RISCV::LW:
+ case RISCV::LW_INX:
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LWU:
@@ -398,6 +399,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
case RISCV::SH:
case RISCV::SH_INX:
case RISCV::SW:
+ case RISCV::SW_INX:
case RISCV::SD:
case RISCV::FSH:
case RISCV::FSW:
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index e3c9ac52d66a35..33363aa8b71830 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -124,41 +124,81 @@ let RegAltNameIndices = [ABIRegAltName] in {
let SubRegIndices = [sub_16] in {
let isConstant = true in
- def X0 : RISCVRegWithSubRegs<0, "x0", [X0_H], ["zero"]>, DwarfRegNum<[0]>;
+ def X0_W : RISCVRegWithSubRegs<0, "x0", [X0_H], ["zero"]>;
let CostPerUse = [0, 1] in {
- def X1 : RISCVRegWithSubRegs<1, "x1", [X1_H], ["ra"]>, DwarfRegNum<[1]>;
- def X2 : RISCVRegWithSubRegs<2, "x2", [X2_H], ["sp"]>, DwarfRegNum<[2]>;
- def X3 : RISCVRegWithSubRegs<3, "x3", [X3_H], ["gp"]>, DwarfRegNum<[3]>;
- def X4 : RISCVRegWithSubRegs<4, "x4", [X4_H], ["tp"]>, DwarfRegNum<[4]>;
- def X5 : RISCVRegWithSubRegs<5, "x5", [X5_H], ["t0"]>, DwarfRegNum<[5]>;
- def X6 : RISCVRegWithSubRegs<6, "x6", [X6_H], ["t1"]>, DwarfRegNum<[6]>;
- def X7 : RISCVRegWithSubRegs<7, "x7", [X7_H], ["t2"]>, DwarfRegNum<[7]>;
+ def X1_W : RISCVRegWithSubRegs<1, "x1", [X1_H], ["ra"]>;
+ def X2_W : RISCVRegWithSubRegs<2, "x2", [X2_H], ["sp"]>;
+ def X3_W : RISCVRegWithSubRegs<3, "x3", [X3_H], ["gp"]>;
+ def X4_W : RISCVRegWithSubRegs<4, "x4", [X4_H], ["tp"]>;
+ def X5_W : RISCVRegWithSubRegs<5, "x5", [X5_H], ["t0"]>;
+ def X6_W : RISCVRegWithSubRegs<6, "x6", [X6_H], ["t1"]>;
+ def X7_W : RISCVRegWithSubRegs<7, "x7", [X7_H], ["t2"]>;
}
- def X8 : RISCVRegWithSubRegs<8, "x8", [X8_H], ["s0", "fp"]>, DwarfRegNum<[8]>;
- def X9 : RISCVRegWithSubRegs<9, "x9", [X9_H], ["s1"]>, DwarfRegNum<[9]>;
- def X10 : RISCVRegWithSubRegs<10,"x10", [X10_H], ["a0"]>, DwarfRegNum<[10]>;
- def X11 : RISCVRegWithSubRegs<11,"x11", [X11_H], ["a1"]>, DwarfRegNum<[11]>;
- def X12 : RISCVRegWithSubRegs<12,"x12", [X12_H], ["a2"]>, DwarfRegNum<[12]>;
- def X13 : RISCVRegWithSubRegs<13,"x13", [X13_H], ["a3"]>, DwarfRegNum<[13]>;
- def X14 : RISCVRegWithSubRegs<14,"x14", [X14_H], ["a4"]>, DwarfRegNum<[14]>;
- def X15 : RISCVRegWithSubRegs<15,"x15", [X15_H], ["a5"]>, DwarfRegNum<[15]>;
+ def X8_W : RISCVRegWithSubRegs<8, "x8", [X8_H], ["s0", "fp"]>;
+ def X9_W : RISCVRegWithSubRegs<9, "x9", [X9_H], ["s1"]>;
+ def X10_W : RISCVRegWithSubRegs<10,"x10", [X10_H], ["a0"]>;
+ def X11_W : RISCVRegWithSubRegs<11,"x11", [X11_H], ["a1"]>;
+ def X12_W : RISCVRegWithSubRegs<12,"x12", [X12_H], ["a2"]>;
+ def X13_W : RISCVRegWithSubRegs<13,"x13", [X13_H], ["a3"]>;
+ def X14_W : RISCVRegWithSubRegs<14,"x14", [X14_H], ["a4"]>;
+ def X15_W : RISCVRegWithSubRegs<15,"x15", [X15_H], ["a5"]>;
let CostPerUse = [0, 1] in {
- def X16 : RISCVRegWithSubRegs<16,"x16", [X16_H], ["a6"]>, DwarfRegNum<[16]>;
- def X17 : RISCVRegWithSubRegs<17,"x17", [X17_H], ["a7"]>, DwarfRegNum<[17]>;
- def X18 : RISCVRegWithSubRegs<18,"x18", [X18_H], ["s2"]>, DwarfRegNum<[18]>;
- def X19 : RISCVRegWithSubRegs<19,"x19", [X19_H], ["s3"]>, DwarfRegNum<[19]>;
- def X20 : RISCVRegWithSubRegs<20,"x20", [X20_H], ["s4"]>, DwarfRegNum<[20]>;
- def X21 : RISCVRegWithSubRegs<21,"x21", [X21_H], ["s5"]>, DwarfRegNum<[21]>;
- def X22 : RISCVRegWithSubRegs<22,"x22", [X22_H], ["s6"]>, DwarfRegNum<[22]>;
- def X23 : RISCVRegWithSubRegs<23,"x23", [X23_H], ["s7"]>, DwarfRegNum<[23]>;
- def X24 : RISCVRegWithSubRegs<24,"x24", [X24_H], ["s8"]>, DwarfRegNum<[24]>;
- def X25 : RISCVRegWithSubRegs<25,"x25", [X25_H], ["s9"]>, DwarfRegNum<[25]>;
- def X26 : RISCVRegWithSubRegs<26,"x26", [X26_H], ["s10"]>, DwarfRegNum<[26]>;
- def X27 : RISCVRegWithSubRegs<27,"x27", [X27_H], ["s11"]>, DwarfRegNum<[27]>;
- def X28 : RISCVRegWithSubRegs<28,"x28", [X28_H], ["t3"]>, DwarfRegNum<[28]>;
- def X29 : RISCVRegWithSubRegs<29,"x29", [X29_H], ["t4"]>, DwarfRegNum<[29]>;
- def X30 : RISCVRegWithSubRegs<30,"x30", [X30_H], ["t5"]>, DwarfRegNum<[30]>;
- def X31 : RISCVRegWithSubRegs<31,"x31", [X31_H], ["t6"]>, DwarfRegNum<[31]>;
+ def X16_W : RISCVRegWithSubRegs<16,"x16", [X16_H], ["a6"]>;
+ def X17_W : RISCVRegWithSubRegs<17,"x17", [X17_H], ["a7"]>;
+ def X18_W : RISCVRegWithSubRegs<18,"x18", [X18_H], ["s2"]>;
+ def X19_W : RISCVRegWithSubRegs<19,"x19", [X19_H], ["s3"]>;
+ def X20_W : RISCVRegWithSubRegs<20,"x20", [X20_H], ["s4"]>;
+ def X21_W : RISCVRegWithSubRegs<21,"x21", [X21_H], ["s5"]>;
+ def X22_W : RISCVRegWithSubRegs<22,"x22", [X22_H], ["s6"]>;
+ def X23_W : RISCVRegWithSubRegs<23,"x23", [X23_H], ["s7"]>;
+ def X24_W : RISCVRegWithSubRegs<24,"x24", [X24_H], ["s8"]>;
+ def X25_W : RISCVRegWithSubRegs<25,"x25", [X25_H], ["s9"]>;
+ def X26_W : RISCVRegWithSubRegs<26,"x26", [X26_H], ["s10"]>;
+ def X27_W : RISCVRegWithSubRegs<27,"x27", [X27_H], ["s11"]>;
+ def X28_W : RISCVRegWithSubRegs<28,"x28", [X28_H], ["t3"]>;
+ def X29_W : RISCVRegWithSubRegs<29,"x29", [X29_H], ["t4"]>;
+ def X30_W : RISCVRegWithSubRegs<30,"x30", [X30_H], ["t5"]>;
+ def X31_W : RISCVRegWithSubRegs<31,"x31", [X31_H], ["t6"]>;
+ }
+ }
+
+ let SubRegIndices = [sub_32] in {
+ let isConstant = true in
+ def X0 : RISCVRegWithSubRegs<0, "x0", [X0_W], ["zero"]>, DwarfRegNum<[0]>;
+ let CostPerUse = [0, 1] in {
+ def X1 : RISCVRegWithSubRegs<1, "x1", [X1_W], ["ra"]>, DwarfRegNum<[1]>;
+ def X2 : RISCVRegWithSubRegs<2, "x2", [X2_W], ["sp"]>, DwarfRegNum<[2]>;
+ def X3 : RISCVRegWithSubRegs<3, "x3", [X3_W], ["gp"]>, DwarfRegNum<[3]>;
+ def X4 : RISCVRegWithSubRegs<4, "x4", [X4_W], ["tp"]>, DwarfRegNum<[4]>;
+ def X5 : RISCVRegWithSubRegs<5, "x5", [X5_W], ["t0"]>, DwarfRegNum<[5]>;
+ def X6 : RISCVRegWithSubRegs<6, "x6", [X6_W], ["t1"]>, DwarfRegNum<[6]>;
+ def X7 : RISCVRegWithSubRegs<7, "x7", [X7_W], ["t2"]>, DwarfRegNum<[7]>;
+ }
+ def X8 : RISCVRegWithSubRegs<8, "x8", [X8_W], ["s0", "fp"]>, DwarfRegNum<[8]>;
+ def X9 : RISCVRegWithSubRegs<9, "x9", [X9_W], ["s1"]>, DwarfRegNum<[9]>;
+ def X10 : RISCVRegWithSubRegs<10,"x10", [X10_W], ["a0"]>, DwarfRegNum<[10]>;
+ def X11 : RISCVRegWithSubRegs<11,"x11", [X11_W], ["a1"]>, DwarfRegNum<[11]>;
+ def X12 : RISCVRegWithSubRegs<12,"x12", [X12_W], ["a2"]>, DwarfRegNum<[12]>;
+ def X13 : RISCVRegWithSubRegs<13,"x13", [X13_W], ["a3"]>, DwarfRegNum<[13]>;
+ def X14 : RISCVRegWithSubRegs<14,"x14", [X14_W], ["a4"]>, DwarfRegNum<[14]>;
+ def X15 : RISCVRegWithSubRegs<15,"x15", [X15_W], ["a5"]>, DwarfRegNum<[15]>;
+ let CostPerUse = [0, 1] in {
+ def X16 : RISCVRegWithSubRegs<16,"x16", [X16_W], ["a6"]>, DwarfRegNum<[16]>;
+ def X17 : RISCVRegWithSubRegs<17,"x17", [X17_W], ["a7"]>, DwarfRegNum<[17]>;
+ def X18 : RISCVRegWithSubRegs<18,"x18", [X18_W], ["s2"]>, DwarfRegNum<[18]>;
+ def X19 : RISCVRegWithSubRegs<19,"x19", [X19_W], ["s3"]>, DwarfRegNum<[19]>;
+ def X20 : RISCVRegWithSubRegs<20,"x20", [X20_W], ["s4"]>, DwarfRegNum<[20]>;
+ def X21 : RISCVRegWithSubRegs<21,"x21", [X21_W], ["s5"]>, DwarfRegNum<[21]>;
+ def X22 : RISCVRegWithSubRegs<22,"x22", [X22_W], ["s6"]>, DwarfRegNum<[22]>;
+ def X23 : RISCVRegWithSubRegs<23,"x23", [X23_W], ["s7"]>, DwarfRegNum<[23]>;
+ def X24 : RISCVRegWithSubRegs<24,"x24", [X24_W], ["s8"]>, DwarfRegNum<[24]>;
+ def X25 : RISCVRegWithSubRegs<25,"x25", [X25_W], ["s9"]>, DwarfRegNum<[25]>;
+ def X26 : RISCVRegWithSubRegs<26,"x26", [X26_W], ["s10"]>, DwarfRegNum<[26]>;
+ def X27 : RISCVRegWithSubRegs<27,"x27", [X27_W], ["s11"]>, DwarfRegNum<[27]>;
+ def X28 : RISCVRegWithSubRegs<28,"x28", [X28_W], ["t3"]>, DwarfRegNum<[28]>;
+ def X29 : RISCVRegWithSubRegs<29,"x29", [X29_W], ["t4"]>, DwarfRegNum<[29]>;
+ def X30 : RISCVRegWithSubRegs<30,"x30", [X30_W], ["t5"]>, DwarfRegNum<[30]>;
+ def X31 : RISCVRegWithSubRegs<31,"x31", [X31_W], ["t6"]>, DwarfRegNum<[31]>;
}
}
}
@@ -617,9 +657,15 @@ def GPRF16 : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 17),
def GPRF16C : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 15),
(sequence "X%u_H", 8, 9))>;
-let RegInfos = XLenRI in {
-def GPRF32 : RISCVRegisterClass<[f32], 32, (add GPR)>;
-} // RegInfos = XLenRI
+def GPRF32 : RISCVRegisterClass<[f32], 32, (add (sequence "X%u_W", 10, 17),
+ (sequence "X%u_W", 5, 7),
+ (sequence "X%u_W", 28, 31),
+ (sequence "X%u_W", 8, 9),
+ (sequence "X%u_W", 18, 27),
+ (sequence "X%u_W", 0, 4))>;
+def GPRF32C : RISCVRegisterClass<[f32], 32, (add (sequence "X%u_W", 10, 15),
+ (sequence "X%u_W", 8, 9))>;
+def GPRF32NoX0 : RISCVRegisterClass<[f32], 32, (sub GPRF32, X0_W)>;
// Dummy zero register for use in the register pair containing X0 (as X1 is
// not read to or written when the X0 register pair is used).
diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index de5bb8a30db16b..0eefc34ad552a9 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -23,26 +23,34 @@ define half @caller_half(half %x) nounwind {
;
; ZFINX32-LABEL: caller_half:
; ZFINX32: # %bb.0: # %entry
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX32-NEXT: tail h
;
; ZFINX64-LABEL: caller_half:
; ZFINX64: # %bb.0: # %entry
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX64-NEXT: tail h
;
; ZDINX32-LABEL: caller_half:
; ZDINX32: # %bb.0: # %entry
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX32-NEXT: lui a1, 1048560
; ZDINX32-NEXT: or a0, a0, a1
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX32-NEXT: tail h
;
; ZDINX64-LABEL: caller_half:
; ZDINX64: # %bb.0: # %entry
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX64-NEXT: lui a1, 1048560
; ZDINX64-NEXT: or a0, a0, a1
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX64-NEXT: tail h
entry:
%0 = tail call fastcc half @h(half %x)
@@ -60,26 +68,34 @@ define internal fastcc half @h(half %x) nounwind {
;
; ZFINX32-LABEL: h:
; ZFINX32: # %bb.0:
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX32-NEXT: ret
;
; ZFINX64-LABEL: h:
; ZFINX64: # %bb.0:
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX64-NEXT: ret
;
; ZDINX32-LABEL: h:
; ZDINX32: # %bb.0:
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX32-NEXT: lui a1, 1048560
; ZDINX32-NEXT: or a0, a0, a1
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX32-NEXT: ret
;
; ZDINX64-LABEL: h:
; ZDINX64: # %bb.0:
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX64-NEXT: lui a1, 1048560
; ZDINX64-NEXT: or a0, a0, a1
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX64-NEXT: ret
ret half %x
}
@@ -220,24 +236,28 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind {
; ZFINX32: # %bb.0:
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX32-NEXT: ret
;
; ZFINX64-LABEL: callee_half_32:
; ZFINX64: # %bb.0:
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX64-NEXT: ret
;
; ZDINX32-LABEL: callee_half_32:
; ZDINX32: # %bb.0:
; ZDINX32-NEXT: lui a1, 1048560
; ZDINX32-NEXT: or a0, a0, a1
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX32-NEXT: ret
;
; ZDINX64-LABEL: callee_half_32:
; ZDINX64: # %bb.0:
; ZDINX64-NEXT: lui a1, 1048560
; ZDINX64-NEXT: or a0, a0, a1
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX64-NEXT: ret
%B = extractelement <32 x half> %A, i32 0
ret half %B
@@ -492,8 +512,10 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: call callee_half_32
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
+; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
@@ -588,8 +610,10 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: call callee_half_32
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
+; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZFINX64-NEXT: ld ra, 296(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: ld s0, 288(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: ld s1, 280(sp) # 8-byte Folded Reload
@@ -684,8 +708,10 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZDINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: call callee_half_32
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX32-NEXT: lui a1, 1048560
; ZDINX32-NEXT: or a0, a0, a1
+; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
@@ -780,8 +806,10 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZDINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: call callee_half_32
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX64-NEXT: lui a1, 1048560
; ZDINX64-NEXT: or a0, a0, a1
+; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; ZDINX64-NEXT: ld ra, 296(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: ld s0, 288(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: ld s1, 280(sp) # 8-byte Folded Reload
@@ -917,79 +945,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
;
; ZHINX64-LABEL: caller_float_32:
; ZHINX64: # %bb.0:
-; ZHINX64-NEXT: addi sp, sp, -192
-; ZHINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: addi sp, sp, -208
+; ZHINX64-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s0, 192(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s1, 184(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s2, 176(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s3, 168(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s4, 160(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s5, 152(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s6, 144(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s7, 136(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s8, 128(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT: lw t0, 208(sp)
+; ZHINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: lw t0, 216(sp)
+; ZHINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
; ZHINX64-NEXT: lw t0, 224(sp)
-; ZHINX64-NEXT: lw t1, 232(sp)
-; ZHINX64-NEXT: lw t2, 240(sp)
-; ZHINX64-NEXT: lw s0, 248(sp)
-; ZHINX64-NEXT: lw t3, 256(sp)
-; ZHINX64-NEXT: lw t4, 264(sp)
-; ZHINX64-NEXT: lw t5, 272(sp)
-; ZHINX64-NEXT: lw t6, 280(sp)
-; ZHINX64-NEXT: lw s1, 288(sp)
-; ZHINX64-NEXT: lw s2, 296(sp)
-; ZHINX64-NEXT: lw s3, 304(sp)
-; ZHINX64-NEXT: lw s4, 312(sp)
-; ZHINX64-NEXT: lw s5, 320(sp)
-; ZHINX64-NEXT: lw s6, 328(sp)
-; ZHINX64-NEXT: lw s7, 336(sp)
-; ZHINX64-NEXT: lw s8, 344(sp)
-; ZHINX64-NEXT: lw s9, 352(sp)
-; ZHINX64-NEXT: lw s10, 360(sp)
-; ZHINX64-NEXT: lw s11, 368(sp)
-; ZHINX64-NEXT: lw ra, 376(sp)
-; ZHINX64-NEXT: sw ra, 76(sp)
-; ZHINX64-NEXT: sw s11, 72(sp)
-; ZHINX64-NEXT: sw s10, 68(sp)
-; ZHINX64-NEXT: sw s9, 64(sp)
-; ZHINX64-NEXT: sw s8, 60(sp)
-; ZHINX64-NEXT: sw s7, 56(sp)
-; ZHINX64-NEXT: sw s6, 52(sp)
-; ZHINX64-NEXT: sw s5, 48(sp)
-; ZHINX64-NEXT: sw s4, 44(sp)
-; ZHINX64-NEXT: sw s3, 40(sp)
-; ZHINX64-NEXT: sw s2, 36(sp)
-; ZHINX64-NEXT: sw s1, 32(sp)
-; ZHINX64-NEXT: sw t6, 28(sp)
-; ZHINX64-NEXT: sw t5, 24(sp)
-; ZHINX64-NEXT: sw t4, 20(sp)
-; ZHINX64-NEXT: sw t3, 16(sp)
-; ZHINX64-NEXT: lw t3, 192(sp)
-; ZHINX64-NEXT: lw t4, 200(sp)
-; ZHINX64-NEXT: lw t5, 208(sp)
-; ZHINX64-NEXT: lw t6, 216(sp)
+; ZHINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: lw t0, 232(sp)
+; ZHINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: lw t6, 240(sp)
+; ZHINX64-NEXT: lw t5, 248(sp)
+; ZHINX64-NEXT: lw t4, 256(sp)
+; ZHINX64-NEXT: lw s0, 264(sp)
+; ZHINX64-NEXT: lw s1, 272(sp)
+; ZHINX64-NEXT: lw s2, 280(sp)
+; ZHINX64-NEXT: lw s3, 288(sp)
+; ZHINX64-NEXT: lw s4, 296(sp)
+; ZHINX64-NEXT: lw s5, 304(sp)
+; ZHINX64-NEXT: lw s6, 312(sp)
+; ZHINX64-NEXT: lw s7, 320(sp)
+; ZHINX64-NEXT: lw s8, 328(sp)
+; ZHINX64-NEXT: lw s9, 336(sp)
+; ZHINX64-NEXT: lw s10, 344(sp)
+; ZHINX64-NEXT: lw s11, 352(sp)
+; ZHINX64-NEXT: lw ra, 360(sp)
+; ZHINX64-NEXT: lw t3, 368(sp)
+; ZHINX64-NEXT: lw t2, 376(sp)
+; ZHINX64-NEXT: lw t1, 384(sp)
+; ZHINX64-NEXT: lw t0, 392(sp)
+; ZHINX64-NEXT: sw t0, 76(sp)
+; ZHINX64-NEXT: sw t1, 72(sp)
+; ZHINX64-NEXT: sw t2, 68(sp)
+; ZHINX64-NEXT: sw t3, 64(sp)
+; ZHINX64-NEXT: sw ra, 60(sp)
+; ZHINX64-NEXT: sw s11, 56(sp)
+; ZHINX64-NEXT: sw s10, 52(sp)
+; ZHINX64-NEXT: sw s9, 48(sp)
+; ZHINX64-NEXT: sw s8, 44(sp)
+; ZHINX64-NEXT: sw s7, 40(sp)
+; ZHINX64-NEXT: sw s6, 36(sp)
+; ZHINX64-NEXT: sw s5, 32(sp)
+; ZHINX64-NEXT: sw s4, 28(sp)
+; ZHINX64-NEXT: sw s3, 24(sp)
+; ZHINX64-NEXT: sw s2, 20(sp)
+; ZHINX64-NEXT: sw s1, 16(sp)
; ZHINX64-NEXT: sw s0, 12(sp)
-; ZHINX64-NEXT: sw t2, 8(sp)
-; ZHINX64-NEXT: sw t1, 4(sp)
-; ZHINX64-NEXT: sw t0, 0(sp)
+; ZHINX64-NEXT: sw t4, 8(sp)
+; ZHINX64-NEXT: sw t5, 4(sp)
+; ZHINX64-NEXT: sw t6, 0(sp)
+; ZHINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload
; ZHINX64-NEXT: call callee_float_32
-; ZHINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: addi sp, sp, 192
+; ZHINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s2, 176(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s4, 160(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s5, 152(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s6, 144(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s7, 136(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s8, 128(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s9, 120(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s10, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld s11, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: addi sp, sp, 208
; ZHINX64-NEXT: ret
;
; ZFINX32-LABEL: caller_float_32:
@@ -1079,79 +1115,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
;
; ZFINX64-LABEL: caller_float_32:
; ZFINX64: # %bb.0:
-; ZFINX64-NEXT: addi sp, sp, -192
-; ZFINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: addi sp, sp, -208
+; ZFINX64-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s0, 192(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s1, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s2, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s3, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s4, 160(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s5, 152(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s6, 144(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s7, 136(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s8, 128(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: lw t0, 208(sp)
+; ZFINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: lw t0, 216(sp)
+; ZFINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
; ZFINX64-NEXT: lw t0, 224(sp)
-; ZFINX64-NEXT: lw t1, 232(sp)
-; ZFINX64-NEXT: lw t2, 240(sp)
-; ZFINX64-NEXT: lw s0, 248(sp)
-; ZFINX64-NEXT: lw t3, 256(sp)
-; ZFINX64-NEXT: lw t4, 264(sp)
-; ZFINX64-NEXT: lw t5, 272(sp)
-; ZFINX64-NEXT: lw t6, 280(sp)
-; ZFINX64-NEXT: lw s1, 288(sp)
-; ZFINX64-NEXT: lw s2, 296(sp)
-; ZFINX64-NEXT: lw s3, 304(sp)
-; ZFINX64-NEXT: lw s4, 312(sp)
-; ZFINX64-NEXT: lw s5, 320(sp)
-; ZFINX64-NEXT: lw s6, 328(sp)
-; ZFINX64-NEXT: lw s7, 336(sp)
-; ZFINX64-NEXT: lw s8, 344(sp)
-; ZFINX64-NEXT: lw s9, 352(sp)
-; ZFINX64-NEXT: lw s10, 360(sp)
-; ZFINX64-NEXT: lw s11, 368(sp)
-; ZFINX64-NEXT: lw ra, 376(sp)
-; ZFINX64-NEXT: sw ra, 76(sp)
-; ZFINX64-NEXT: sw s11, 72(sp)
-; ZFINX64-NEXT: sw s10, 68(sp)
-; ZFINX64-NEXT: sw s9, 64(sp)
-; ZFINX64-NEXT: sw s8, 60(sp)
-; ZFINX64-NEXT: sw s7, 56(sp)
-; ZFINX64-NEXT: sw s6, 52(sp)
-; ZFINX64-NEXT: sw s5, 48(sp)
-; ZFINX64-NEXT: sw s4, 44(sp)
-; ZFINX64-NEXT: sw s3, 40(sp)
-; ZFINX64-NEXT: sw s2, 36(sp)
-; ZFINX64-NEXT: sw s1, 32(sp)
-; ZFINX64-NEXT: sw t6, 28(sp)
-; ZFINX64-NEXT: sw t5, 24(sp)
-; ZFINX64-NEXT: sw t4, 20(sp)
-; ZFINX64-NEXT: sw t3, 16(sp)
-; ZFINX64-NEXT: lw t3, 192(sp)
-; ZFINX64-NEXT: lw t4, 200(sp)
-; ZFINX64-NEXT: lw t5, 208(sp)
-; ZFINX64-NEXT: lw t6, 216(sp)
+; ZFINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: lw t0, 232(sp)
+; ZFINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: lw t6, 240(sp)
+; ZFINX64-NEXT: lw t5, 248(sp)
+; ZFINX64-NEXT: lw t4, 256(sp)
+; ZFINX64-NEXT: lw s0, 264(sp)
+; ZFINX64-NEXT: lw s1, 272(sp)
+; ZFINX64-NEXT: lw s2, 280(sp)
+; ZFINX64-NEXT: lw s3, 288(sp)
+; ZFINX64-NEXT: lw s4, 296(sp)
+; ZFINX64-NEXT: lw s5, 304(sp)
+; ZFINX64-NEXT: lw s6, 312(sp)
+; ZFINX64-NEXT: lw s7, 320(sp)
+; ZFINX64-NEXT: lw s8, 328(sp)
+; ZFINX64-NEXT: lw s9, 336(sp)
+; ZFINX64-NEXT: lw s10, 344(sp)
+; ZFINX64-NEXT: lw s11, 352(sp)
+; ZFINX64-NEXT: lw ra, 360(sp)
+; ZFINX64-NEXT: lw t3, 368(sp)
+; ZFINX64-NEXT: lw t2, 376(sp)
+; ZFINX64-NEXT: lw t1, 384(sp)
+; ZFINX64-NEXT: lw t0, 392(sp)
+; ZFINX64-NEXT: sw t0, 76(sp)
+; ZFINX64-NEXT: sw t1, 72(sp)
+; ZFINX64-NEXT: sw t2, 68(sp)
+; ZFINX64-NEXT: sw t3, 64(sp)
+; ZFINX64-NEXT: sw ra, 60(sp)
+; ZFINX64-NEXT: sw s11, 56(sp)
+; ZFINX64-NEXT: sw s10, 52(sp)
+; ZFINX64-NEXT: sw s9, 48(sp)
+; ZFINX64-NEXT: sw s8, 44(sp)
+; ZFINX64-NEXT: sw s7, 40(sp)
+; ZFINX64-NEXT: sw s6, 36(sp)
+; ZFINX64-NEXT: sw s5, 32(sp)
+; ZFINX64-NEXT: sw s4, 28(sp)
+; ZFINX64-NEXT: sw s3, 24(sp)
+; ZFINX64-NEXT: sw s2, 20(sp)
+; ZFINX64-NEXT: sw s1, 16(sp)
; ZFINX64-NEXT: sw s0, 12(sp)
-; ZFINX64-NEXT: sw t2, 8(sp)
-; ZFINX64-NEXT: sw t1, 4(sp)
-; ZFINX64-NEXT: sw t0, 0(sp)
+; ZFINX64-NEXT: sw t4, 8(sp)
+; ZFINX64-NEXT: sw t5, 4(sp)
+; ZFINX64-NEXT: sw t6, 0(sp)
+; ZFINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload
; ZFINX64-NEXT: call callee_float_32
-; ZFINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: addi sp, sp, 192
+; ZFINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s2, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s4, 160(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s5, 152(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s6, 144(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s7, 136(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s8, 128(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s9, 120(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s10, 112(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s11, 104(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: addi sp, sp, 208
; ZFINX64-NEXT: ret
;
; ZDINX32-LABEL: caller_float_32:
@@ -1241,79 +1285,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
;
; ZDINX64-LABEL: caller_float_32:
; ZDINX64: # %bb.0:
-; ZDINX64-NEXT: addi sp, sp, -192
-; ZDINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: addi sp, sp, -208
+; ZDINX64-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s0, 192(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s1, 184(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s2, 176(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s3, 168(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s4, 160(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s5, 152(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s6, 144(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s7, 136(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s8, 128(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: lw t0, 208(sp)
+; ZDINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: lw t0, 216(sp)
+; ZDINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
; ZDINX64-NEXT: lw t0, 224(sp)
-; ZDINX64-NEXT: lw t1, 232(sp)
-; ZDINX64-NEXT: lw t2, 240(sp)
-; ZDINX64-NEXT: lw s0, 248(sp)
-; ZDINX64-NEXT: lw t3, 256(sp)
-; ZDINX64-NEXT: lw t4, 264(sp)
-; ZDINX64-NEXT: lw t5, 272(sp)
-; ZDINX64-NEXT: lw t6, 280(sp)
-; ZDINX64-NEXT: lw s1, 288(sp)
-; ZDINX64-NEXT: lw s2, 296(sp)
-; ZDINX64-NEXT: lw s3, 304(sp)
-; ZDINX64-NEXT: lw s4, 312(sp)
-; ZDINX64-NEXT: lw s5, 320(sp)
-; ZDINX64-NEXT: lw s6, 328(sp)
-; ZDINX64-NEXT: lw s7, 336(sp)
-; ZDINX64-NEXT: lw s8, 344(sp)
-; ZDINX64-NEXT: lw s9, 352(sp)
-; ZDINX64-NEXT: lw s10, 360(sp)
-; ZDINX64-NEXT: lw s11, 368(sp)
-; ZDINX64-NEXT: lw ra, 376(sp)
-; ZDINX64-NEXT: sw ra, 76(sp)
-; ZDINX64-NEXT: sw s11, 72(sp)
-; ZDINX64-NEXT: sw s10, 68(sp)
-; ZDINX64-NEXT: sw s9, 64(sp)
-; ZDINX64-NEXT: sw s8, 60(sp)
-; ZDINX64-NEXT: sw s7, 56(sp)
-; ZDINX64-NEXT: sw s6, 52(sp)
-; ZDINX64-NEXT: sw s5, 48(sp)
-; ZDINX64-NEXT: sw s4, 44(sp)
-; ZDINX64-NEXT: sw s3, 40(sp)
-; ZDINX64-NEXT: sw s2, 36(sp)
-; ZDINX64-NEXT: sw s1, 32(sp)
-; ZDINX64-NEXT: sw t6, 28(sp)
-; ZDINX64-NEXT: sw t5, 24(sp)
-; ZDINX64-NEXT: sw t4, 20(sp)
-; ZDINX64-NEXT: sw t3, 16(sp)
-; ZDINX64-NEXT: lw t3, 192(sp)
-; ZDINX64-NEXT: lw t4, 200(sp)
-; ZDINX64-NEXT: lw t5, 208(sp)
-; ZDINX64-NEXT: lw t6, 216(sp)
+; ZDINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: lw t0, 232(sp)
+; ZDINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: lw t6, 240(sp)
+; ZDINX64-NEXT: lw t5, 248(sp)
+; ZDINX64-NEXT: lw t4, 256(sp)
+; ZDINX64-NEXT: lw s0, 264(sp)
+; ZDINX64-NEXT: lw s1, 272(sp)
+; ZDINX64-NEXT: lw s2, 280(sp)
+; ZDINX64-NEXT: lw s3, 288(sp)
+; ZDINX64-NEXT: lw s4, 296(sp)
+; ZDINX64-NEXT: lw s5, 304(sp)
+; ZDINX64-NEXT: lw s6, 312(sp)
+; ZDINX64-NEXT: lw s7, 320(sp)
+; ZDINX64-NEXT: lw s8, 328(sp)
+; ZDINX64-NEXT: lw s9, 336(sp)
+; ZDINX64-NEXT: lw s10, 344(sp)
+; ZDINX64-NEXT: lw s11, 352(sp)
+; ZDINX64-NEXT: lw ra, 360(sp)
+; ZDINX64-NEXT: lw t3, 368(sp)
+; ZDINX64-NEXT: lw t2, 376(sp)
+; ZDINX64-NEXT: lw t1, 384(sp)
+; ZDINX64-NEXT: lw t0, 392(sp)
+; ZDINX64-NEXT: sw t0, 76(sp)
+; ZDINX64-NEXT: sw t1, 72(sp)
+; ZDINX64-NEXT: sw t2, 68(sp)
+; ZDINX64-NEXT: sw t3, 64(sp)
+; ZDINX64-NEXT: sw ra, 60(sp)
+; ZDINX64-NEXT: sw s11, 56(sp)
+; ZDINX64-NEXT: sw s10, 52(sp)
+; ZDINX64-NEXT: sw s9, 48(sp)
+; ZDINX64-NEXT: sw s8, 44(sp)
+; ZDINX64-NEXT: sw s7, 40(sp)
+; ZDINX64-NEXT: sw s6, 36(sp)
+; ZDINX64-NEXT: sw s5, 32(sp)
+; ZDINX64-NEXT: sw s4, 28(sp)
+; ZDINX64-NEXT: sw s3, 24(sp)
+; ZDINX64-NEXT: sw s2, 20(sp)
+; ZDINX64-NEXT: sw s1, 16(sp)
; ZDINX64-NEXT: sw s0, 12(sp)
-; ZDINX64-NEXT: sw t2, 8(sp)
-; ZDINX64-NEXT: sw t1, 4(sp)
-; ZDINX64-NEXT: sw t0, 0(sp)
+; ZDINX64-NEXT: sw t4, 8(sp)
+; ZDINX64-NEXT: sw t5, 4(sp)
+; ZDINX64-NEXT: sw t6, 0(sp)
+; ZDINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload
; ZDINX64-NEXT: call callee_float_32
-; ZDINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: addi sp, sp, 192
+; ZDINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s2, 176(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s4, 160(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s5, 152(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s6, 144(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s7, 136(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s8, 128(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s9, 120(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s10, 112(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s11, 104(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: addi sp, sp, 208
; ZDINX64-NEXT: ret
%C = call fastcc float @callee_float_32(<32 x float> %A)
ret float %C
diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll
index 3f32734db0ba71..bf500d1a2adb39 100644
--- a/llvm/test/CodeGen/RISCV/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith.ll
@@ -706,18 +706,11 @@ define float @fnmadd_s_3(float %a, float %b, float %c) nounwind {
; CHECKIF-NEXT: fneg.s fa0, fa5
; CHECKIF-NEXT: ret
;
-; RV32IZFINX-LABEL: fnmadd_s_3:
-; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: fmadd.s a0, a0, a1, a2
-; RV32IZFINX-NEXT: fneg.s a0, a0
-; RV32IZFINX-NEXT: ret
-;
-; RV64IZFINX-LABEL: fnmadd_s_3:
-; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: fmadd.s a0, a0, a1, a2
-; RV64IZFINX-NEXT: lui a1, 524288
-; RV64IZFINX-NEXT: xor a0, a0, a1
-; RV64IZFINX-NEXT: ret
+; CHECKIZFINX-LABEL: fnmadd_s_3:
+; CHECKIZFINX: # %bb.0:
+; CHECKIZFINX-NEXT: fmadd.s a0, a0, a1, a2
+; CHECKIZFINX-NEXT: fneg.s a0, a0
+; CHECKIZFINX-NEXT: ret
;
; RV32I-LABEL: fnmadd_s_3:
; RV32I: # %bb.0:
@@ -761,17 +754,10 @@ define float @fnmadd_nsz(float %a, float %b, float %c) nounwind {
; CHECKIF-NEXT: fnmadd.s fa0, fa0, fa1, fa2
; CHECKIF-NEXT: ret
;
-; RV32IZFINX-LABEL: fnmadd_nsz:
-; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: fnmadd.s a0, a0, a1, a2
-; RV32IZFINX-NEXT: ret
-;
-; RV64IZFINX-LABEL: fnmadd_nsz:
-; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: fmadd.s a0, a0, a1, a2
-; RV64IZFINX-NEXT: lui a1, 524288
-; RV64IZFINX-NEXT: xor a0, a0, a1
-; RV64IZFINX-NEXT: ret
+; CHECKIZFINX-LABEL: fnmadd_nsz:
+; CHECKIZFINX: # %bb.0:
+; CHECKIZFINX-NEXT: fnmadd.s a0, a0, a1, a2
+; CHECKIZFINX-NEXT: ret
;
; RV32I-LABEL: fnmadd_nsz:
; RV32I: # %bb.0:
@@ -1247,3 +1233,6 @@ define float @fsgnjx_f32(float %x, float %y) nounwind {
%mul = fmul float %z, %y
ret float %mul
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32IZFINX: {{.*}}
+; RV64IZFINX: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
index 2338219687ef75..86f6f079243c26 100644
--- a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
@@ -50,8 +50,7 @@ define float @fneg(float %a) nounwind {
;
; RV64IZFINX-LABEL: fneg:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: lui a1, 524288
-; RV64IZFINX-NEXT: xor a0, a0, a1
+; RV64IZFINX-NEXT: fneg.s a0, a0
; RV64IZFINX-NEXT: ret
%1 = fneg float %a
ret float %1
@@ -91,8 +90,7 @@ define float @fabs(float %a) nounwind {
;
; RV64IZFINX-LABEL: fabs:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: slli a0, a0, 33
-; RV64IZFINX-NEXT: srli a0, a0, 33
+; RV64IZFINX-NEXT: fabs.s a0, a0
; RV64IZFINX-NEXT: ret
%1 = call float @llvm.fabs.f32(float %a)
ret float %1
diff --git a/llvm/test/CodeGen/RISCV/float-frem.ll b/llvm/test/CodeGen/RISCV/float-frem.ll
index 651b1b116adc76..31d39a5ab6d6ea 100644
--- a/llvm/test/CodeGen/RISCV/float-frem.ll
+++ b/llvm/test/CodeGen/RISCV/float-frem.ll
@@ -27,12 +27,7 @@ define float @frem_f32(float %a, float %b) nounwind {
;
; RV64IZFINX-LABEL: frem_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call fmodf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail fmodf
;
; RV32I-LABEL: frem_f32:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll
index 69a506cd850f2c..58cbc72e2197c9 100644
--- a/llvm/test/CodeGen/RISCV/float-imm.ll
+++ b/llvm/test/CodeGen/RISCV/float-imm.ll
@@ -20,12 +20,14 @@ define float @float_imm() nounwind {
; RV32ZFINX: # %bb.0:
; RV32ZFINX-NEXT: lui a0, 263313
; RV32ZFINX-NEXT: addi a0, a0, -37
+; RV32ZFINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV32ZFINX-NEXT: ret
;
; RV64ZFINX-LABEL: float_imm:
; RV64ZFINX: # %bb.0:
; RV64ZFINX-NEXT: lui a0, 263313
; RV64ZFINX-NEXT: addiw a0, a0, -37
+; RV64ZFINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV64ZFINX-NEXT: ret
ret float 3.14159274101257324218750
}
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index 52442026dab502..b05eac9c9dee26 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -136,12 +136,7 @@ define float @sin_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: sin_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call sinf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail sinf
;
; RV32I-LABEL: sin_f32:
; RV32I: # %bb.0:
@@ -181,12 +176,7 @@ define float @cos_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: cos_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call cosf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail cosf
;
; RV32I-LABEL: cos_f32:
; RV32I: # %bb.0:
@@ -327,12 +317,7 @@ define float @pow_f32(float %a, float %b) nounwind {
;
; RV64IZFINX-LABEL: pow_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call powf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail powf
;
; RV32I-LABEL: pow_f32:
; RV32I: # %bb.0:
@@ -372,12 +357,7 @@ define float @exp_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: exp_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call expf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail expf
;
; RV32I-LABEL: exp_f32:
; RV32I: # %bb.0:
@@ -417,12 +397,7 @@ define float @exp2_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: exp2_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call exp2f
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail exp2f
;
; RV32I-LABEL: exp2_f32:
; RV32I: # %bb.0:
@@ -462,12 +437,7 @@ define float @log_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: log_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call logf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail logf
;
; RV32I-LABEL: log_f32:
; RV32I: # %bb.0:
@@ -507,12 +477,7 @@ define float @log10_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: log10_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call log10f
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail log10f
;
; RV32I-LABEL: log10_f32:
; RV32I: # %bb.0:
@@ -552,12 +517,7 @@ define float @log2_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: log2_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call log2f
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail log2f
;
; RV32I-LABEL: log2_f32:
; RV32I: # %bb.0:
@@ -698,8 +658,7 @@ define float @fabs_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: fabs_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: slli a0, a0, 33
-; RV64IZFINX-NEXT: srli a0, a0, 33
+; RV64IZFINX-NEXT: fabs.s a0, a0
; RV64IZFINX-NEXT: ret
;
; RV32I-LABEL: fabs_f32:
@@ -1195,12 +1154,7 @@ define float @nearbyint_f32(float %a) nounwind {
;
; RV64IZFINX-LABEL: nearbyint_f32:
; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: addi sp, sp, -16
-; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZFINX-NEXT: call nearbyintf
-; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64IZFINX-NEXT: addi sp, sp, 16
-; RV64IZFINX-NEXT: ret
+; RV64IZFINX-NEXT: tail nearbyintf
;
; RV32I-LABEL: nearbyint_f32:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index 81e29329e71817..18cdb18106f343 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -153,8 +153,8 @@ define half @powi_f16(half %a, i32 %b) nounwind {
; RV64IZHINX: # %bb.0:
; RV64IZHINX-NEXT: addi sp, sp, -16
; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZHINX-NEXT: sext.w a1, a1
; RV64IZHINX-NEXT: fcvt.s.h a0, a0
+; RV64IZHINX-NEXT: sext.w a1, a1
; RV64IZHINX-NEXT: call __powisf2
; RV64IZHINX-NEXT: fcvt.h.s a0, a0
; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -233,8 +233,8 @@ define half @powi_f16(half %a, i32 %b) nounwind {
; RV64IZHINXMIN: # %bb.0:
; RV64IZHINXMIN-NEXT: addi sp, sp, -16
; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IZHINXMIN-NEXT: sext.w a1, a1
; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0
+; RV64IZHINXMIN-NEXT: sext.w a1, a1
; RV64IZHINXMIN-NEXT: call __powisf2
; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0
; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/kcfi-mir.ll b/llvm/test/CodeGen/RISCV/kcfi-mir.ll
index e478930d59abc5..f35be0564cb25f 100644
--- a/llvm/test/CodeGen/RISCV/kcfi-mir.ll
+++ b/llvm/test/CodeGen/RISCV/kcfi-mir.ll
@@ -10,7 +10,7 @@ define void @f1(ptr noundef %x) !kcfi_type !1 {
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
; CHECK-NEXT: SD killed $x1, $x2, 8 :: (store (s64) into %stack.0)
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8
- ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit-def dead $x1, implicit-def $x2, implicit-def $x2_h, implicit killed $x10 {
+ ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_w, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_w, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_w, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_w, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_w, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_w, implicit-def $x31_h, implicit-def dead $x1, implicit-def $x2, implicit-def $x2_w, implicit-def $x2_h, implicit killed $x10 {
; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31
; CHECK-NEXT: PseudoCALLIndirect killed $x10, csr_ilp32_lp64, implicit-def dead $x1, implicit-def $x2
; CHECK-NEXT: }
@@ -26,7 +26,7 @@ define void @f2(ptr noundef %x) #0 {
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: liveins: $x10
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit killed $x10, implicit $x2 {
+ ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_w, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_w, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_w, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_w, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_w, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_w, implicit-def $x31_h, implicit killed $x10, implicit $x2 {
; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31
; CHECK-NEXT: PseudoTAILIndirect killed $x10, implicit $x2
; CHECK-NEXT: }
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 442b0cf5b4a856..2c9d640e03a634 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -62,8 +62,10 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: call __truncsfhf2
; RV32IZFINXZDINX-NEXT: lw a1, 8(sp)
+; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
; RV32IZFINXZDINX-NEXT: lui a2, 1048560
; RV32IZFINXZDINX-NEXT: or a0, a0, a2
+; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
; RV32IZFINXZDINX-NEXT: ret
@@ -77,8 +79,10 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
; RV64IZFINXZDINX-NEXT: call frexpf
; RV64IZFINXZDINX-NEXT: call __truncsfhf2
; RV64IZFINXZDINX-NEXT: ld a1, 0(sp)
+; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
; RV64IZFINXZDINX-NEXT: lui a2, 1048560
; RV64IZFINXZDINX-NEXT: or a0, a0, a2
+; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
; RV64IZFINXZDINX-NEXT: ret
@@ -157,8 +161,10 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind {
; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: call __truncsfhf2
+; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
; RV32IZFINXZDINX-NEXT: lui a1, 1048560
; RV32IZFINXZDINX-NEXT: or a0, a0, a1
+; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
; RV32IZFINXZDINX-NEXT: ret
@@ -171,8 +177,10 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind {
; RV64IZFINXZDINX-NEXT: mv a1, sp
; RV64IZFINXZDINX-NEXT: call frexpf
; RV64IZFINXZDINX-NEXT: call __truncsfhf2
+; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
; RV64IZFINXZDINX-NEXT: lui a1, 1048560
; RV64IZFINXZDINX-NEXT: or a0, a0, a1
+; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: addi sp, sp, 16
; RV64IZFINXZDINX-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir b/llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir
new file mode 100644
index 00000000000000..d0223dc5911ad3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir
@@ -0,0 +1,296 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=riscv32 -mattr=+c,+zfinx -simplify-mir \
+# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s
+# RUN: llc -o - %s -mtriple=riscv64 -mattr=+c,+zfinx -simplify-mir \
+# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s
+
+--- |
+
+ define void @store_common_value_float(ptr %a, ptr %b, ptr %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j) #0 {
+ entry:
+ store float %j, ptr %a, align 4
+ store float %j, ptr %b, align 4
+ store float %j, ptr %c, align 4
+ ret void
+ }
+
+ define void @store_common_ptr_float(float %a, float %b, float %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, ptr %p) #0 {
+ entry:
+ store volatile float %a, ptr %p, align 4
+ store volatile float %b, ptr %p, align 4
+ store volatile float %c, ptr %p, align 4
+ ret void
+ }
+
+ define void @load_common_ptr_float(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ptr %g) #0 {
+ entry:
+ %0 = load float, ptr %g, align 4
+ %arrayidx1 = getelementptr inbounds float, ptr %g, i32 1
+ %1 = load float, ptr %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %g, i32 2
+ %2 = load float, ptr %arrayidx2, align 4
+ tail call void @load_common_ptr_float_1(float %0, float %1, float %2)
+ ret void
+ }
+
+ declare void @load_common_ptr_float_1(float, float, float) #0
+
+ define void @store_large_offset_float(ptr %p, float %a, float %b, float %c, float %d) #0 {
+ entry:
+ %0 = getelementptr inbounds float, ptr %p, i32 100
+ store volatile float %a, ptr %0, align 4
+ %1 = getelementptr inbounds float, ptr %p, i32 101
+ store volatile float %b, ptr %1, align 4
+ %2 = getelementptr inbounds float, ptr %p, i32 102
+ store volatile float %c, ptr %2, align 4
+ %3 = getelementptr inbounds float, ptr %p, i32 103
+ store volatile float %d, ptr %3, align 4
+ ret void
+ }
+
+ define void @load_large_offset_float(ptr %p) #0 {
+ entry:
+ %arrayidx = getelementptr inbounds float, ptr %p, i32 100
+ %0 = load float, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, ptr %p, i32 101
+ %1 = load float, ptr %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %p, i32 102
+ %2 = load float, ptr %arrayidx2, align 4
+ tail call void @load_large_offset_float_1(float %0, float %1, float %2)
+ ret void
+ }
+
+ declare void @load_large_offset_float_1(float, float, float) #0
+
+ define void @store_common_value_float_no_opt(ptr %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h) #0 {
+ entry:
+ store float %h, ptr %a, align 4
+ ret void
+ }
+
+ define void @store_common_ptr_float_no_opt(float %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, ptr %p) #0 {
+ entry:
+ store volatile float %a, ptr %p, align 4
+ ret void
+ }
+
+ define float @load_common_ptr_float_no_opt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ptr %g) #0 {
+ entry:
+ %0 = load float, ptr %g, align 4
+ ret float %0
+ }
+
+ define void @store_large_offset_float_no_opt(ptr %p, float %a, float %b) #0 {
+ entry:
+ %0 = getelementptr inbounds float, ptr %p, i32 100
+ store volatile float %a, ptr %0, align 4
+ %1 = getelementptr inbounds float, ptr %p, i32 101
+ store volatile float %b, ptr %1, align 4
+ ret void
+ }
+
+ define { float, float } @load_large_offset_float_no_opt(ptr %p) #0 {
+ entry:
+ %arrayidx = getelementptr inbounds float, ptr %p, i32 100
+ %0 = load float, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, ptr %p, i32 101
+ %1 = load float, ptr %arrayidx1, align 4
+ %2 = insertvalue { float, float } undef, float %0, 0
+ %3 = insertvalue { float, float } %2, float %1, 1
+ ret { float, float } %3
+ }
+
+ attributes #0 = { minsize }
+
+...
+---
+name: store_common_value_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11, $x12
+
+ ; CHECK-LABEL: name: store_common_value_float
+ ; CHECK: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x13_w = PseudoMV_FPR32INX $x0_w
+ ; CHECK-NEXT: SW_INX $x13_w, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ ; CHECK-NEXT: SW_INX $x13_w, killed renamable $x11, 0 :: (store (s32) into %ir.b)
+ ; CHECK-NEXT: SW_INX killed $x13_w, killed renamable $x12, 0 :: (store (s32) into %ir.c)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX $x0_w, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ SW_INX $x0_w, killed renamable $x11, 0 :: (store (s32) into %ir.b)
+ SW_INX killed $x0_w, killed renamable $x12, 0 :: (store (s32) into %ir.c)
+ PseudoRET
+
+...
+---
+name: store_common_ptr_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10_w, $x11_w, $x12_w, $x16
+
+ ; CHECK-LABEL: name: store_common_ptr_float
+ ; CHECK: liveins: $x10_w, $x11_w, $x12_w, $x16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x13 = ADDI $x16, 0
+ ; CHECK-NEXT: SW_INX killed renamable $x10_w, $x13, 0 :: (volatile store (s32) into %ir.p)
+ ; CHECK-NEXT: SW_INX killed renamable $x11_w, $x13, 0 :: (volatile store (s32) into %ir.p)
+ ; CHECK-NEXT: SW_INX killed renamable $x12_w, killed $x13, 0 :: (volatile store (s32) into %ir.p)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x10_w, renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ SW_INX killed renamable $x11_w, renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ SW_INX killed renamable $x12_w, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ PseudoRET
+
+...
+---
+name: load_common_ptr_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x16
+
+ ; CHECK-LABEL: name: load_common_ptr_float
+ ; CHECK: liveins: $x16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x13 = ADDI $x16, 0
+ ; CHECK-NEXT: renamable $x10_w = LW_INX $x13, 0 :: (load (s32) from %ir.g)
+ ; CHECK-NEXT: renamable $x11_w = LW_INX $x13, 4 :: (load (s32) from %ir.arrayidx1)
+ ; CHECK-NEXT: renamable $x12_w = LW_INX killed $x13, 8 :: (load (s32) from %ir.arrayidx2)
+ ; CHECK-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $x10_w, implicit $x11_w, implicit $x12_w
+ renamable $x10_w = LW_INX renamable $x16, 0 :: (load (s32) from %ir.g)
+ renamable $x11_w = LW_INX renamable $x16, 4 :: (load (s32) from %ir.arrayidx1)
+ renamable $x12_w = LW_INX killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2)
+ PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $x10_w, implicit $x11_w, implicit $x12_w
+
+...
+---
+name: store_large_offset_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11_w, $x11_w, $x12_w, $x13_w
+
+ ; CHECK-LABEL: name: store_large_offset_float
+ ; CHECK: liveins: $x10, $x11_w, $x11_w, $x12_w, $x13_w
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x14 = ADDI $x10, 384
+ ; CHECK-NEXT: SW_INX killed renamable $x10_w, $x14, 16 :: (volatile store (s32) into %ir.0)
+ ; CHECK-NEXT: SW_INX killed renamable $x11_w, $x14, 20 :: (volatile store (s32) into %ir.1)
+ ; CHECK-NEXT: SW_INX killed renamable $x12_w, $x14, 24 :: (volatile store (s32) into %ir.2)
+ ; CHECK-NEXT: SW_INX killed renamable $x13_w, killed $x14, 28 :: (volatile store (s32) into %ir.3)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x10_w, renamable $x10, 400 :: (volatile store (s32) into %ir.0)
+ SW_INX killed renamable $x11_w, renamable $x10, 404 :: (volatile store (s32) into %ir.1)
+ SW_INX killed renamable $x12_w, renamable $x10, 408 :: (volatile store (s32) into %ir.2)
+ SW_INX killed renamable $x13_w, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3)
+ PseudoRET
+
+...
+---
+name: load_large_offset_float
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: load_large_offset_float
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x14 = ADDI $x10, 384
+ ; CHECK-NEXT: renamable $x11_w = LW_INX $x14, 16 :: (load (s32) from %ir.arrayidx)
+ ; CHECK-NEXT: renamable $x12_w = LW_INX $x14, 20 :: (load (s32) from %ir.arrayidx1)
+ ; CHECK-NEXT: renamable $x13_w = LW_INX killed $x14, 24 :: (load (s32) from %ir.arrayidx2)
+ ; CHECK-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $x11_w, implicit $x12_w, implicit $x12_w
+ renamable $x11_w = LW_INX renamable $x10, 400 :: (load (s32) from %ir.arrayidx)
+ renamable $x12_w = LW_INX renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
+ renamable $x13_w = LW_INX killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2)
+ PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $x11_w, implicit $x12_w, implicit $x12_w
+
+...
+---
+name: store_common_value_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x16_w
+
+ ; CHECK-LABEL: name: store_common_value_float_no_opt
+ ; CHECK: liveins: $x10, $x16_w
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW_INX killed renamable $x16_w, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x16_w, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ PseudoRET
+
+...
+---
+name: store_common_ptr_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x16, $x10_w
+
+ ; CHECK-LABEL: name: store_common_ptr_float_no_opt
+ ; CHECK: liveins: $x16, $x10_w
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW_INX killed renamable $x10_w, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x10_w, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ PseudoRET
+
+...
+---
+name: load_common_ptr_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x16
+
+ ; CHECK-LABEL: name: load_common_ptr_float_no_opt
+ ; CHECK: liveins: $x16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x10_w = LW_INX killed renamable $x16, 0 :: (load (s32) from %ir.g)
+ ; CHECK-NEXT: PseudoRET implicit $x10_w
+ renamable $x10_w = LW_INX killed renamable $x16, 0 :: (load (s32) from %ir.g)
+ PseudoRET implicit $x10_w
+
+...
+---
+name: store_large_offset_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11_w, $x12_w
+
+ ; CHECK-LABEL: name: store_large_offset_float_no_opt
+ ; CHECK: liveins: $x10, $x11_w, $x12_w
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SW_INX killed renamable $x11_w, renamable $x10, 400 :: (volatile store (s32) into %ir.0)
+ ; CHECK-NEXT: SW_INX killed renamable $x12_w, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1)
+ ; CHECK-NEXT: PseudoRET
+ SW_INX killed renamable $x11_w, renamable $x10, 400 :: (volatile store (s32) into %ir.0)
+ SW_INX killed renamable $x12_w, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1)
+ PseudoRET
+
+...
+---
+name: load_large_offset_float_no_opt
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: load_large_offset_float_no_opt
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x11_w = LW_INX renamable $x10, 400 :: (load (s32) from %ir.arrayidx)
+ ; CHECK-NEXT: renamable $x12_w = LW_INX killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
+ ; CHECK-NEXT: PseudoRET implicit $x11_w, implicit $x12_w
+ renamable $x11_w = LW_INX renamable $x10, 400 :: (load (s32) from %ir.arrayidx)
+ renamable $x12_w = LW_INX killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
+ PseudoRET implicit $x11_w, implicit $x12_w
+
+...
More information about the llvm-commits
mailing list