[clang] [llvm] [RISCV] Add MIPS extensions (PR #121394)
Djordje Todorovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 31 06:34:34 PST 2024
https://github.com/djtodoro created https://github.com/llvm/llvm-project/pull/121394
Adding two extensions for MIPS p8700 CPU:
1. cmove (conditional move)
2. lsp (load/store pair)
The official product page here:
https://mips.com/products/hardware/p8700
>From a2e1fd5ffab4fe64a160571a9dffaacc3f22c32d Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Thu, 26 Dec 2024 09:09:24 +0100
Subject: [PATCH] [RISCV] Add MIPS extensions
Adding two extensions for MIPS p8700 CPU:
1. cmove (conditional move)
2. lsp (load/store pair)
The official product page here:
https://mips.com/products/hardware/p8700
---
clang/include/clang/Driver/Options.td | 4 +
clang/lib/Driver/ToolChains/Clang.cpp | 15 +
llvm/docs/RISCVUsage.rst | 6 +
.../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 10 +
llvm/lib/Target/RISCV/CMakeLists.txt | 1 +
.../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 1 +
llvm/lib/Target/RISCV/RISCV.h | 2 +
llvm/lib/Target/RISCV/RISCVFeatures.td | 13 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +-
llvm/lib/Target/RISCV/RISCVInstrFormats.td | 72 +++
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 43 ++
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 6 +
llvm/lib/Target/RISCV/RISCVInstrInfo.td | 145 +++++
llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 125 -----
llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td | 82 +++
.../Target/RISCV/RISCVLoadStoreOptimizer.cpp | 370 +++++++++++++
llvm/lib/Target/RISCV/RISCVProcessors.td | 4 +-
llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 18 +
llvm/lib/Target/RISCV/RISCVSubtarget.h | 2 +
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 15 +
llvm/test/CodeGen/RISCV/O0-pipeline.ll | 1 +
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 2 +
llvm/test/CodeGen/RISCV/load-store-pair.ll | 509 ++++++++++++++++++
llvm/test/CodeGen/RISCV/select-and.ll | 25 +
llvm/test/CodeGen/RISCV/select-bare.ll | 14 +
llvm/test/CodeGen/RISCV/select-cc.ll | 86 +++
llvm/test/CodeGen/RISCV/select-or.ll | 25 +
27 files changed, 1473 insertions(+), 127 deletions(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
create mode 100644 llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
create mode 100644 llvm/test/CodeGen/RISCV/load-store-pair.ll
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index d922709db17786..28b7d70f77d4a2 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4963,6 +4963,10 @@ def msave_restore : Flag<["-"], "msave-restore">, Group<m_riscv_Features_Group>,
def mno_save_restore : Flag<["-"], "mno-save-restore">, Group<m_riscv_Features_Group>,
HelpText<"Disable using library calls for save and restore">;
} // let Flags = [TargetSpecific]
+def mload_store_pairs : Flag<["-"], "mload-store-pairs">, Group<m_riscv_Features_Group>;
+def mno_load_store_pairs : Flag<["-"], "mno-load-store-pairs">, Group<m_riscv_Features_Group>;
+def mccmov : Flag<["-"], "mccmov">, Group<m_riscv_Features_Group>;
+def mno_ccmov : Flag<["-"], "mno-ccmov">, Group<m_riscv_Features_Group>;
let Flags = [TargetSpecific] in {
def menable_experimental_extensions : Flag<["-"], "menable-experimental-extensions">, Group<m_Group>,
HelpText<"Enable use of experimental RISC-V extensions.">;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a020e00cd17392..e91df8838032cd 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2167,6 +2167,21 @@ void Clang::AddRISCVTargetArgs(const ArgList &Args,
CmdArgs.push_back(A->getValue());
}
+ if (Arg *A = Args.getLastArg(options::OPT_mload_store_pairs,
+ options::OPT_mno_load_store_pairs)) {
+ if (A->getOption().matches(options::OPT_mload_store_pairs)) {
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back("-riscv-load-store-pairs=1");
+ }
+ }
+
+ if (Arg *A = Args.getLastArg(options::OPT_mccmov,
+ options::OPT_mno_ccmov)) {
+ if (A->getOption().matches(options::OPT_mno_ccmov)) {
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back("-riscv-ccmov=0");
+ }
+ }
// Handle -mrvv-vector-bits=<bits>
if (Arg *A = Args.getLastArg(options::OPT_mrvv_vector_bits_EQ)) {
StringRef Val = A->getValue();
diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index 22600f5720553e..06b32a69cef9ea 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -444,6 +444,12 @@ The current vendor extensions supported are:
``experimental-Xqcisls``
LLVM implements `version 0.2 of the Qualcomm uC Scaled Load Store extension specification <https://github.com/quic/riscv-unified-db/releases/latest>`__ by Qualcomm. All instructions are prefixed with `qc.` as described in the specification. These instructions are only available for riscv32.
+``Xmipscmove``
+ LLVM implements conditional move for the `p8700 processor <https://mips.com/products/hardware/p8700/>` by MIPS.
+
+``Xmipslsp``
+ LLVM implements load/store pair instructions for the `p8700 processor <https://mips.com/products/hardware/p8700/>` by MIPS.
+
Experimental C Intrinsics
=========================
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 4c1fd5aa41e2b7..76eb5254a19a34 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -876,6 +876,16 @@ struct RISCVOperand final : public MCParsedAsmOperand {
VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isUImm7Lsb000() const {
+ if (!isImm())
+ return false;
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isShiftedUInt<4, 3>(Imm) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isUImm8Lsb00() const {
if (!isImm())
return false;
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 44661647a86310..cc9bf5727cbdf5 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -47,6 +47,7 @@ add_llvm_target(RISCVCodeGen
RISCVISelLowering.cpp
RISCVLandingPadSetup.cpp
RISCVMachineFunctionInfo.cpp
+ RISCVLoadStoreOptimizer.cpp
RISCVMergeBaseOffset.cpp
RISCVOptWInstrs.cpp
RISCVPostRAExpandPseudoInsts.cpp
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 7fb5fc7a831308..f51a9205cbd460 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -308,6 +308,7 @@ enum OperandType : unsigned {
OPERAND_UIMM6_LSB0,
OPERAND_UIMM7,
OPERAND_UIMM7_LSB00,
+ OPERAND_UIMM7_LSB000,
OPERAND_UIMM8_LSB00,
OPERAND_UIMM8,
OPERAND_UIMM8_LSB000,
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index d7bab601d545cc..b1aee98739e852 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -84,6 +84,8 @@ void initializeRISCVMoveMergePass(PassRegistry &);
FunctionPass *createRISCVPushPopOptimizationPass();
void initializeRISCVPushPopOptPass(PassRegistry &);
+FunctionPass *createRISCVLoadStoreOptPass();
+void initializeRISCVLoadStoreOptPass(PassRegistry &);
FunctionPass *createRISCVZacasABIFixPass();
void initializeRISCVZacasABIFixPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 916b140c5bde75..9ba2c92cc90d1a 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1395,6 +1395,19 @@ def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()
def TuneMIPSP8700
: SubtargetFeature<"mips-p8700", "RISCVProcFamily", "MIPSP8700",
"MIPS p8700 processor">;
+def FeatureVendorMIPSCMove : SubtargetFeature<"xmipscmove", "HasVendorMIPSCMove",
+ "true", "Using CCMov",
+ [Feature64Bit]>;
+def HasVendorMIPSCMove
+ : Predicate<"Subtarget->useCCMovInsn()">,
+ AssemblerPredicate<(all_of FeatureVendorMIPSCMove), "'ccmov' instruction">;
+def FeatureVendorMIPSLoadStorePairs
+ : SubtargetFeature<"xmipslsp", "HasMIPSLSP", "true",
+ "Optimize for hardware load-store bonding">;
+def HasVendorMIPSLoadStorePairs
+ : Predicate<"Subtarget->useLoadStorePairs()">,
+ AssemblerPredicate<(all_of FeatureVendorMIPSLoadStorePairs),
+ "load and store pair instructions">;
def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors">;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cda64ae5f498d3..88dd9f0ec4f18c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -409,7 +409,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ABS, MVT::i32, Custom);
}
- if (!Subtarget.hasVendorXTHeadCondMov())
+ if (Subtarget.hasVendorMIPSCMove())
+ setOperationAction(ISD::SELECT, XLenVT, Legal);
+ else if (!Subtarget.hasVendorXTHeadCondMov())
setOperationAction(ISD::SELECT, XLenVT, Custom);
static const unsigned FPLegalNodeTypes[] = {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 013c26c72bfd55..9ffed2c80ad6d3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -514,6 +514,78 @@ class RVInstJ<RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
let Inst{6-0} = opcode.Value;
}
+//===----------------------------------------------------------------------===//
+// MIPS custom instruction formats
+//===----------------------------------------------------------------------===//
+
+// Load double pair format.
+class LDPFormat<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<7> imm7;
+ bits<5> rs1;
+ bits<5> rd1;
+ bits<5> rd2;
+
+ let Inst{31-27} = rd2;
+ let Inst{26-23} = imm7{6-3};
+ let Inst{22-20} = 0b000;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b100;
+ let Inst{11-7} = rd1;
+ let Inst{6-0} = 0b0001011;
+}
+
+// Load word pair format.
+class LWPFormat<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<7> imm7;
+ bits<5> rs1;
+ bits<5> rd1;
+ bits<5> rd2;
+
+ let Inst{31-27} = rd2;
+ let Inst{26-22} = imm7{6-2};
+ let Inst{21-20} = 0b01;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b100;
+ let Inst{11-7} = rd1;
+ let Inst{6-0} = 0b0001011;
+}
+
+// Store double pair format.
+class SDPFormat<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<7> imm7;
+ bits<5> rs3;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31-27} = rs3;
+ let Inst{26-25} = imm7{6-5};
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b101;
+ let Inst{11-10} = imm7{4-3};
+ let Inst{9-0} = 0b0000001011;
+}
+
+// Store word pair format.
+class SWPFormat<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<7> imm7;
+ bits<5> rs3;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31-27} = rs3;
+ let Inst{26-25} = imm7{6-5};
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b101;
+ let Inst{11-9} = imm7{4-2};
+ let Inst{8-0} = 0b010001011;
+}
+
//===----------------------------------------------------------------------===//
// Instruction classes for .insn directives
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index f24940795e433f..c92c8e8077c7a3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2488,6 +2488,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_UIMM7_LSB00:
Ok = isShiftedUInt<5, 2>(Imm);
break;
+ case RISCVOp::OPERAND_UIMM7_LSB000:
+ Ok = isShiftedUInt<4, 3>(Imm);
+ break;
case RISCVOp::OPERAND_UIMM8_LSB00:
Ok = isShiftedUInt<6, 2>(Imm);
break;
@@ -2734,6 +2737,46 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
.setMIFlags(MemI.getFlags());
}
+bool RISCVInstrInfo::isPairableLdStInstOpc(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case RISCV::SH:
+ case RISCV::LH:
+ case RISCV::LHU:
+ case RISCV::SW:
+ case RISCV::FSW:
+ case RISCV::LW:
+ case RISCV::FLW:
+ case RISCV::SD:
+ case RISCV::FSD:
+ case RISCV::LD:
+ case RISCV::FLD:
+ return true;
+ }
+}
+
+bool RISCVInstrInfo::isLdStSafeToPair(const MachineInstr &LdSt,
+ const TargetRegisterInfo *TRI) {
+ // If this is a volatile load/store, don't mess with it.
+ if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
+ return false;
+
+ if (LdSt.getOperand(1).isFI())
+ return true;
+
+ assert(LdSt.getOperand(1).isReg() && "Expected a reg operand.");
+ // Can't cluster if the instruction modifies the base register
+ // or it is update form. e.g. ld x5,8(x5)
+ if (LdSt.modifiesRegister(LdSt.getOperand(1).getReg(), TRI))
+ return false;
+
+ if (!LdSt.getOperand(2).isImm())
+ return false;
+
+ return true;
+}
+
bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 7e8bcd451a8ef8..ef81c2d4397f26 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -301,6 +301,12 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
+ /// Return true if pairing the given load or store may be paired with another.
+ static bool isPairableLdStInstOpc(unsigned Opc);
+
+ static bool isLdStSafeToPair(const MachineInstr &LdSt,
+ const TargetRegisterInfo *TRI);
+
protected:
const RISCVSubtarget &STI;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 1260f99ad9dcd0..3e29e77aa3db6f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -257,6 +257,146 @@ def simm12 : RISCVSImmLeafOp<12> {
}];
}
+// A 7-bit unsigned immediate where the least significant two bits are zero.
+def uimm7_lsb00 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<5, 2>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<7, "Lsb00">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_UIMM7_LSB00";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<5, 2>(Imm);
+ }];
+}
+
+// A 7-bit unsigned immediate where the least significant three bits are zero.
+def uimm7_lsb000 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<4, 3>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<7, "Lsb000">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_UIMM7_LSB000";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<4, 3>(Imm);
+ }];
+}
+
+// A 8-bit unsigned immediate where the least significant two bits are zero.
+def uimm8_lsb00 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<6, 2>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<8, "Lsb00">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<8>";
+ let OperandType = "OPERAND_UIMM8_LSB00";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<6, 2>(Imm);
+ }];
+}
+
+// A 8-bit unsigned immediate where the least significant three bits are zero.
+def uimm8_lsb000 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<5, 3>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<8, "Lsb000">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<8>";
+ let OperandType = "OPERAND_UIMM8_LSB000";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<5, 3>(Imm);
+ }];
+}
+
+// A 9-bit signed immediate where the least significant bit is zero.
+def simm9_lsb0 : Operand<OtherVT>,
+ ImmLeaf<XLenVT, [{return isShiftedInt<8, 1>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<9, "Lsb0">;
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getImmOpValueAsr1";
+ let DecoderMethod = "decodeSImmOperandAndLsl1<9>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedInt<8, 1>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+ let OperandType = "OPERAND_PCREL";
+}
+
+// A 9-bit unsigned immediate where the least significant three bits are zero.
+def uimm9_lsb000 : RISCVOp,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<6, 3>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<9, "Lsb000">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<9>";
+ let OperandType = "OPERAND_UIMM9_LSB000";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<6, 3>(Imm);
+ }];
+}
+
+// A 10-bit unsigned immediate where the least significant two bits are zero
+// and the immediate can't be zero.
+def uimm10_lsb00nonzero : RISCVOp,
+ ImmLeaf<XLenVT,
+ [{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> {
+ let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmNonZeroOperand<10>";
+ let OperandType = "OPERAND_UIMM10_LSB00_NONZERO";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<8, 2>(Imm) && (Imm != 0);
+ }];
+}
+
+// A 10-bit signed immediate where the least significant four bits are zero.
+def simm10_lsb0000nonzero : RISCVOp,
+ ImmLeaf<XLenVT,
+ [{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeSImmNonZeroOperand<10>";
+ let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedInt<6, 4>(Imm) && (Imm != 0);
+ }];
+}
+
+// A 12-bit signed immediate where the least significant bit is zero.
+def simm12_lsb0 : Operand<XLenVT>,
+ ImmLeaf<XLenVT, [{return isShiftedInt<11, 1>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<12, "Lsb0">;
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getImmOpValueAsr1";
+ let DecoderMethod = "decodeSImmOperandAndLsl1<12>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedInt<11, 1>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+ let OperandType = "OPERAND_PCREL";
+}
+
// A 12-bit signed immediate which cannot fit in 6-bit signed immediate,
// but even negative value fit in 12-bit.
def simm12_no6 : ImmLeaf<XLenVT, [{
@@ -394,6 +534,10 @@ def ixlenimm_li_restricted : Operand<XLenVT> {
// Standalone (codegen-only) immleaf patterns.
+// A 12-bit signed immediate plus one where the imm range will be -2047~2048.
+def simm12_plus1 : ImmLeaf<XLenVT,
+ [{return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+
// A 6-bit constant greater than 32.
def uimm6gt32 : ImmLeaf<XLenVT, [{
return isUInt<6>(Imm) && Imm > 32;
@@ -2119,6 +2263,7 @@ include "RISCVInstrInfoSFB.td"
include "RISCVInstrInfoXCV.td"
include "RISCVInstrInfoXwch.td"
include "RISCVInstrInfoXqci.td"
+include "RISCVInstrInfoXMips.td"
//===----------------------------------------------------------------------===//
// Global ISel
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index ce994206cd785b..84ecb95212d3ae 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -94,131 +94,6 @@ def c_lui_imm : RISCVOp,
}];
}
-// A 7-bit unsigned immediate where the least significant two bits are zero.
-def uimm7_lsb00 : RISCVOp,
- ImmLeaf<XLenVT, [{return isShiftedUInt<5, 2>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<7, "Lsb00">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<7>";
- let OperandType = "OPERAND_UIMM7_LSB00";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<5, 2>(Imm);
- }];
-}
-
-// A 8-bit unsigned immediate where the least significant two bits are zero.
-def uimm8_lsb00 : RISCVOp,
- ImmLeaf<XLenVT, [{return isShiftedUInt<6, 2>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<8, "Lsb00">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<8>";
- let OperandType = "OPERAND_UIMM8_LSB00";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<6, 2>(Imm);
- }];
-}
-
-// A 8-bit unsigned immediate where the least significant three bits are zero.
-def uimm8_lsb000 : RISCVOp,
- ImmLeaf<XLenVT, [{return isShiftedUInt<5, 3>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<8, "Lsb000">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<8>";
- let OperandType = "OPERAND_UIMM8_LSB000";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<5, 3>(Imm);
- }];
-}
-
-// A 9-bit signed immediate where the least significant bit is zero.
-def simm9_lsb0 : Operand<OtherVT>,
- ImmLeaf<XLenVT, [{return isShiftedInt<8, 1>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<9, "Lsb0">;
- let PrintMethod = "printBranchOperand";
- let EncoderMethod = "getImmOpValueAsr1";
- let DecoderMethod = "decodeSImmOperandAndLsl1<9>";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (MCOp.evaluateAsConstantImm(Imm))
- return isShiftedInt<8, 1>(Imm);
- return MCOp.isBareSymbolRef();
- }];
- let OperandType = "OPERAND_PCREL";
-}
-
-// A 9-bit unsigned immediate where the least significant three bits are zero.
-def uimm9_lsb000 : RISCVOp,
- ImmLeaf<XLenVT, [{return isShiftedUInt<6, 3>(Imm);}]> {
- let ParserMatchClass = UImmAsmOperand<9, "Lsb000">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmOperand<9>";
- let OperandType = "OPERAND_UIMM9_LSB000";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<6, 3>(Imm);
- }];
-}
-
-// A 10-bit unsigned immediate where the least significant two bits are zero
-// and the immediate can't be zero.
-def uimm10_lsb00nonzero : RISCVOp,
- ImmLeaf<XLenVT,
- [{return isShiftedUInt<8, 2>(Imm) && (Imm != 0);}]> {
- let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeUImmNonZeroOperand<10>";
- let OperandType = "OPERAND_UIMM10_LSB00_NONZERO";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedUInt<8, 2>(Imm) && (Imm != 0);
- }];
-}
-
-// A 10-bit signed immediate where the least significant four bits are zero.
-def simm10_lsb0000nonzero : RISCVOp,
- ImmLeaf<XLenVT,
- [{return (Imm != 0) && isShiftedInt<6, 4>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">;
- let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmNonZeroOperand<10>";
- let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (!MCOp.evaluateAsConstantImm(Imm))
- return false;
- return isShiftedInt<6, 4>(Imm) && (Imm != 0);
- }];
-}
-
-// A 12-bit signed immediate where the least significant bit is zero.
-def simm12_lsb0 : Operand<XLenVT>,
- ImmLeaf<XLenVT, [{return isShiftedInt<11, 1>(Imm);}]> {
- let ParserMatchClass = SImmAsmOperand<12, "Lsb0">;
- let PrintMethod = "printBranchOperand";
- let EncoderMethod = "getImmOpValueAsr1";
- let DecoderMethod = "decodeSImmOperandAndLsl1<12>";
- let MCOperandPredicate = [{
- int64_t Imm;
- if (MCOp.evaluateAsConstantImm(Imm))
- return isShiftedInt<11, 1>(Imm);
- return MCOp.isBareSymbolRef();
- }];
- let OperandType = "OPERAND_PCREL";
-}
-
def InsnCDirectiveOpcode : AsmOperandClass {
let Name = "InsnCDirectiveOpcode";
let ParserMethod = "parseInsnCDirectiveOpcode";
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
new file mode 100644
index 00000000000000..3bd3279f498283
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
@@ -0,0 +1,82 @@
+//===-- RISCVInstrInfoXMips.td -----------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the vendor extensions defined by MIPS.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+// MIPS extensions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasVendorMIPSCMove], hasSideEffects = 0, mayLoad = 0, mayStore = 0, DecoderNamespace = "Xmipscomve" in {
+def CCMOV : RVInstR4<0b11, 0b011, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, GPR:$rs3),
+ "ccmov", "$rd, $rs2, $rs1, $rs3">,
+ Sched<[]>;
+}
+
+let Predicates = [HasVendorMIPSCMove] in {
+def : Pat<(select (XLenVT (setne (XLenVT GPR:$rs2), (XLenVT 0))),
+ (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
+ (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(select (XLenVT (seteq (XLenVT GPR:$rs2), (XLenVT 0))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(select (XLenVT (setne (XLenVT GPR:$x), (XLenVT simm12_plus1:$y))),
+ (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
+ (CCMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>;
+def : Pat<(select (XLenVT (seteq (XLenVT GPR:$x), (XLenVT simm12_plus1:$y))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setne (XLenVT GPR:$x), (XLenVT GPR:$y))),
+ (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
+ (CCMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (seteq (XLenVT GPR:$x), (XLenVT GPR:$y))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setuge (XLenVT GPR:$x), (XLenVT GPR:$y))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setge (XLenVT GPR:$x), (XLenVT GPR:$y))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setle (XLenVT GPR:$y), (XLenVT GPR:$x))),
+ (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)),
+ (CCMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>;
+def : Pat<(select (XLenVT GPR:$rs2), (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
+ (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+}
+
+let Predicates = [HasVendorMIPSLoadStorePairs], hasSideEffects = 0, DecoderNamespace = "Xmipslsp" in {
+def LWP : LWPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb00:$imm7),
+ "lwp", "$rd1, $rd2, ${imm7}(${rs1})">,
+ Sched<[WriteLDW, WriteLDW, ReadMemBase]> {
+let mayLoad = 1;
+let mayStore = 0;
+}
+def LDP : LDPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb000:$imm7),
+ "ldp", "$rd1, $rd2, ${imm7}(${rs1})">,
+ Sched<[WriteLDD, WriteLDD, ReadMemBase]> {
+let mayLoad = 1;
+let mayStore = 0;
+}
+def SWP : SWPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb00:$imm7),
+ "swp", "$rs2, $rs3, ${imm7}(${rs1})">,
+ Sched<[WriteSTW, ReadStoreData, ReadStoreData, ReadMemBase]> {
+let mayLoad = 0;
+let mayStore = 1;
+}
+def SDP : SDPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb000:$imm7),
+ "sdp", "$rs2, $rs3, ${imm7}(${rs1})">,
+ Sched<[WriteSTD, ReadStoreData, ReadStoreData, ReadMemBase]> {
+let mayLoad = 0;
+let mayStore = 1;
+}
+}
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
new file mode 100644
index 00000000000000..b2575d54fc4a49
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -0,0 +1,370 @@
+//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Bundle loads and stores that operate on consecutive memory locations to take
+// the advantage of hardware load/store bonding.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-load-store-opt"
+#define RISCV_LOAD_STORE_OPT_NAME "RISCV Load / Store Optimizer"
+namespace {
+
+struct RISCVLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ RISCVLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; }
+
+ // Find and pair load/store instructions.
+ bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
+
+ // Convert load/store pairs to single instructions.
+ bool tryConvertToLdStPair(MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Second);
+
+ // Scan the instructions looking for a load/store that can be combined
+ // with the current instruction into a load/store pair.
+ // Return the matching instruction if one is found, else MBB->end().
+ MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
+ bool &MergeForward);
+
+ MachineBasicBlock::iterator
+ mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired, bool MergeForward);
+
+private:
+ AliasAnalysis *AA;
+ MachineRegisterInfo *MRI;
+ const RISCVInstrInfo *TII;
+ const RISCVRegisterInfo *TRI;
+ LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+ bool UseLoadStorePair = false;
+};
+} // end anonymous namespace
+
+char RISCVLoadStoreOpt::ID = 0;
+INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false,
+ false)
+
+bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(Fn.getFunction()))
+ return false;
+ const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
+
+ if (!Subtarget.useLoadStorePairs())
+ return false;
+
+ bool MadeChange = false;
+ TII = Subtarget.getInstrInfo();
+ TRI = Subtarget.getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ ModifiedRegUnits.init(*TRI);
+ UsedRegUnits.init(*TRI);
+ UseLoadStorePair = Subtarget.useLoadStorePairs();
+
+ for (MachineBasicBlock &MBB : Fn) {
+ LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) &&
+ tryToPairLdStInst(MBBI))
+ MadeChange = true;
+ else
+ ++MBBI;
+ }
+ }
+ return MadeChange;
+}
+
+// Find loads and stores that can be merged into a single load or store pair
+// instruction.
+bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock::iterator E = MI.getParent()->end();
+
+ if (!TII->isLdStSafeToPair(MI, TRI))
+ return false;
+
+ // Look ahead for a pairable instruction.
+ bool MergeForward;
+ MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward);
+ if (Paired != E) {
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+ return true;
+ }
+ return false;
+}
+
+bool RISCVLoadStoreOpt::tryConvertToLdStPair(
+ MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) {
+ if (!UseLoadStorePair)
+ return false;
+
+ unsigned PairOpc;
+ // TODO: Handle the rest from RISCVInstrInfo::isPairableLdStInstOpc.
+ switch (First->getOpcode()) {
+ default:
+ return false;
+ case RISCV::SW:
+ PairOpc = RISCV::SWP;
+ break;
+ case RISCV::LW:
+ PairOpc = RISCV::LWP;
+ break;
+ case RISCV::SD:
+ PairOpc = RISCV::SDP;
+ break;
+ case RISCV::LD:
+ PairOpc = RISCV::LDP;
+ break;
+ }
+
+ MachineFunction *MF = First->getMF();
+ const MachineMemOperand *MMO = *First->memoperands_begin();
+ Align MMOAlign = MMO->getAlign();
+ if (const PseudoSourceValue *Source = MMO->getPseudoValue())
+ if (Source->kind() == PseudoSourceValue::FixedStack)
+ MMOAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
+
+ if (MMOAlign < Align(MMO->getSize().getValue() * 2))
+ return false;
+ int64_t Offset = First->getOperand(2).getImm();
+ if (!isUInt<7>(Offset) ||
+ !isAligned(Align(MMO->getSize().getValue()), Offset))
+ return false;
+ MachineInstrBuilder MIB = BuildMI(
+ *MF,
+ First->getDebugLoc().get() ? First->getDebugLoc() : Second->getDebugLoc(),
+ TII->get(PairOpc));
+ MIB.add(First->getOperand(0))
+ .add(Second->getOperand(0))
+ .add(First->getOperand(1))
+ .add(First->getOperand(2))
+ .cloneMergedMemRefs({&*First, &*Second});
+
+ First->getParent()->insert(First, MIB);
+
+ First->removeFromParent();
+ Second->removeFromParent();
+
+ return true;
+}
+
+static bool mayAlias(MachineInstr &MIa,
+ SmallVectorImpl<MachineInstr *> &MemInsns,
+ AliasAnalysis *AA) {
+ for (MachineInstr *MIb : MemInsns)
+ if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
+ return true;
+
+ return false;
+}
+
+// Scan the instructions looking for a load/store that can be combined with the
+// current instruction into a wider equivalent or a load/store pair.
+// TODO: Extend pairing logic to consider reordering both instructions
+// to a safe "middle" position rather than only merging forward/backward.
+// This requires more sophisticated checks for aliasing, register
+// liveness, and potential scheduling hazards.
+MachineBasicBlock::iterator
+RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
+ bool &MergeForward) {
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator MBBI = I;
+ MachineInstr &FirstMI = *I;
+ MBBI = next_nodbg(MBBI, E);
+
+ bool MayLoad = FirstMI.mayLoad();
+ Register Reg = FirstMI.getOperand(0).getReg();
+ Register BaseReg = FirstMI.getOperand(1).getReg();
+ int Offset = FirstMI.getOperand(2).getImm();
+ int OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue();
+
+ MergeForward = false;
+
+ // Track which register units have been modified and used between the first
+ // insn (inclusive) and the second insn.
+ ModifiedRegUnits.clear();
+ UsedRegUnits.clear();
+
+ // Remember any instructions that read/write memory between FirstMI and MI.
+ SmallVector<MachineInstr *, 4> MemInsns;
+
+ for (unsigned Count = 0; MBBI != E && Count < 128;
+ MBBI = next_nodbg(MBBI, E)) {
+ MachineInstr &MI = *MBBI;
+
+ // Don't count transient instructions towards the search limit since there
+ // may be different numbers of them if e.g. debug information is present.
+ if (!MI.isTransient())
+ ++Count;
+
+ if (MI.getOpcode() == FirstMI.getOpcode() &&
+ TII->isLdStSafeToPair(MI, TRI)) {
+ Register MIBaseReg = MI.getOperand(1).getReg();
+ int MIOffset = MI.getOperand(2).getImm();
+
+ if (BaseReg == MIBaseReg) {
+
+ if ((Offset != MIOffset + OffsetStride) &&
+ (Offset + OffsetStride != MIOffset)) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
+
+ // If the destination register of one load is the same register or a
+ // sub/super register of the other load, bail and keep looking.
+ if (MayLoad &&
+ TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ MemInsns.push_back(&MI);
+ continue;
+ }
+
+ // If the BaseReg has been modified, then we cannot do the optimization.
+ if (!ModifiedRegUnits.available(BaseReg))
+ return E;
+
+ // If the Rt of the second instruction was not modified or used between
+ // the two instructions and none of the instructions between the second
+ // and first alias with the second, we can combine the second into the
+ // first.
+ if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) &&
+ !(MI.mayLoad() &&
+ !UsedRegUnits.available(MI.getOperand(0).getReg())) &&
+ !mayAlias(MI, MemInsns, AA)) {
+
+ MergeForward = false;
+ return MBBI;
+ }
+
+ // Likewise, if the Rt of the first instruction is not modified or used
+ // between the two instructions and none of the instructions between the
+ // first and the second alias with the first, we can combine the first
+ // into the second.
+ if (!(MayLoad &&
+ !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) &&
+ !mayAlias(FirstMI, MemInsns, AA)) {
+
+ if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) {
+ MergeForward = true;
+ return MBBI;
+ }
+ }
+ // Unable to combine these instructions due to interference in between.
+ // Keep looking.
+ }
+ }
+
+ // If the instruction wasn't a matching load or store. Stop searching if we
+ // encounter a call instruction that might modify memory.
+ if (MI.isCall())
+ return E;
+
+ // Update modified / uses register units.
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+
+ // Otherwise, if the base register is modified, we have no match, so
+ // return early.
+ if (!ModifiedRegUnits.available(BaseReg))
+ return E;
+
+ // Update list of instructions that read/write memory.
+ if (MI.mayLoadOrStore())
+ MemInsns.push_back(&MI);
+ }
+ return E;
+}
+
+MachineBasicBlock::iterator __attribute__((noinline))
+RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ bool MergeForward) {
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+ if (NextI == Paired)
+ NextI = next_nodbg(NextI, E);
+
+ // Insert our new paired instruction after whichever of the paired
+ // instructions MergeForward indicates.
+ MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
+ MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired;
+ int Offset = I->getOperand(2).getImm();
+ int PairedOffset = Paired->getOperand(2).getImm();
+ bool InsertAfter = (Offset < PairedOffset) ^ MergeForward;
+
+ if (!MergeForward)
+ Paired->getOperand(1).setIsKill(false);
+
+ // Kill flags may become invalid when moving stores for pairing.
+ if (I->getOperand(0).isUse()) {
+ if (!MergeForward) {
+ // Clear kill flags on store if moving upwards.
+ I->getOperand(0).setIsKill(false);
+ Paired->getOperand(0).setIsKill(false);
+ } else {
+ // Clear kill flags of the first stores register.
+ Register Reg = I->getOperand(0).getReg();
+ for (MachineInstr &MI : make_range(std::next(I), std::next(Paired)))
+ MI.clearRegisterKills(Reg, TRI);
+ }
+ }
+
+ MachineInstr *ToInsert = DeletionPoint->removeFromParent();
+ MachineBasicBlock &MBB = *InsertionPoint->getParent();
+ MachineBasicBlock::iterator First, Second;
+
+ if (!InsertAfter) {
+ First = MBB.insert(InsertionPoint, ToInsert);
+ Second = InsertionPoint;
+ } else {
+ Second = MBB.insertAfter(InsertionPoint, ToInsert);
+ First = InsertionPoint;
+ }
+
+ if (!tryConvertToLdStPair(First, Second))
+ finalizeBundle(MBB, First.getInstrIterator(),
+ std::next(Second).getInstrIterator());
+
+ LLVM_DEBUG(dbgs() << "Bonding pair load/store:\n ");
+ LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs()));
+ return NextI;
+}
+
+// Returns an instance of the Load / Store Optimization pass.
+FunctionPass *llvm::createRISCVLoadStoreOptPass() {
+ return new RISCVLoadStoreOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 61c7c21367036f..110c7a65ae047e 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -116,7 +116,9 @@ def MIPS_P8700 : RISCVProcessorModel<"mips-p8700",
FeatureStdExtZba,
FeatureStdExtZbb,
FeatureStdExtZifencei,
- FeatureStdExtZicsr],
+ FeatureStdExtZicsr,
+ FeatureVendorMIPSCMove,
+ FeatureVendorMIPSLoadStorePairs],
[TuneMIPSP8700]>;
def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 6e212dc58e6ddd..e6307086d93a31 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -62,6 +62,16 @@ static cl::opt<unsigned> RISCVMinimumJumpTableEntries(
"riscv-min-jump-table-entries", cl::Hidden,
cl::desc("Set minimum number of entries to use a jump table on RISCV"));
+static cl::opt<bool> UseLoadStorePairsOpt(
+ "riscv-load-store-pairs",
+ cl::desc("RISCV: Optimize for load-store bonding"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> UseCCMovInsn(
+ "riscv-ccmov",
+ cl::desc("RISCV: Use 'ccmov' instruction"),
+ cl::init(true), cl::Hidden);
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -238,3 +248,11 @@ void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy,
Policy.OnlyBottomUp = false;
}
}
+
+bool RISCVSubtarget::useLoadStorePairs() const {
+ return UseLoadStorePairsOpt && HasMIPSLSP;
+}
+
+bool RISCVSubtarget::useCCMovInsn() const {
+ return UseCCMovInsn && HasVendorMIPSCMove;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 87d508c3941737..8bec6edb324b14 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -188,6 +188,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
unsigned getXLen() const {
return is64Bit() ? 64 : 32;
}
+ bool useLoadStorePairs() const;
+ bool useCCMovInsn() const;
unsigned getFLen() const {
if (HasStdExtD)
return 64;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index f6ccbfbe217df6..4c8cd97c698ffd 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -143,6 +143,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVDAGToDAGISelLegacyPass(*PR);
initializeRISCVMoveMergePass(*PR);
initializeRISCVPushPopOptPass(*PR);
+ initializeRISCVLoadStoreOptPass(*PR);
}
static StringRef computeDataLayout(const Triple &TT,
@@ -389,6 +390,13 @@ class RISCVPassConfig : public TargetPassConfig {
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
}
+
+ const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+ if (!ST.getMacroFusions().empty() && ST.useLoadStorePairs()) {
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+ }
+
return DAG;
}
@@ -548,6 +556,8 @@ void RISCVPassConfig::addPreSched2() {
// Emit KCFI checks for indirect calls.
addPass(createKCFIPass());
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
+ addPass(createRISCVLoadStoreOptPass());
}
void RISCVPassConfig::addPreEmitPass() {
@@ -561,6 +571,11 @@ void RISCVPassConfig::addPreEmitPass() {
addPass(createMachineCopyPropagationPass(true));
addPass(&BranchRelaxationPassID);
addPass(createRISCVMakeCompressibleOptPass());
+
+ // LoadStoreOptimizer creates bundles for load-store bonding.
+ addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
+ return MF.getSubtarget<RISCVSubtarget>().useLoadStorePairs();
+ }));
}
void RISCVPassConfig::addPreEmitPass2() {
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index f60def9d546f81..5ee6c192b80291 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -63,6 +63,7 @@
; CHECK-NEXT: Implement the 'patchable-function' attribute
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
+; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index b0c756e26985bb..473c41109671b6 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -177,6 +177,7 @@
; CHECK-NEXT: Post-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V post-regalloc pseudo instruction expansion pass
; CHECK-NEXT: Insert KCFI indirect call checks
+; CHECK-NEXT: RISCV Load / Store Optimizer
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: PostRA Machine Instruction Scheduler
@@ -190,6 +191,7 @@
; CHECK-NEXT: Machine Copy Propagation Pass
; CHECK-NEXT: Branch relaxation pass
; CHECK-NEXT: RISC-V Make Compressible
+; CHECK-NEXT: Unpack machine instruction bundles
; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: Remove Loads Into Fake Uses
; CHECK-NEXT: StackMap Liveness Analysis
diff --git a/llvm/test/CodeGen/RISCV/load-store-pair.ll b/llvm/test/CodeGen/RISCV/load-store-pair.ll
new file mode 100644
index 00000000000000..76649b831f266a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/load-store-pair.ll
@@ -0,0 +1,509 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32D
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I_PAIR
+; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d,+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32D_PAIR
+; RUN: llc -mtriple=riscv64 -mattr=+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I_PAIR
+; RUN: llc -mtriple=riscv64 -mcpu mips-p8700 -mattr=+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64P_8700
+; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d,+Xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64D_PAIR
+; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64D_8700
+
+define dso_local void @testi(i8** nocapture noundef readonly %a) local_unnamed_addr #0 {
+; RV32I-LABEL: testi:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: .cfi_def_cfa_offset 16
+; RV32I-NEXT: sw s2, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s5, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT: .cfi_offset s2, -4
+; RV32I-NEXT: .cfi_offset s3, -8
+; RV32I-NEXT: .cfi_offset s4, -12
+; RV32I-NEXT: .cfi_offset s5, -16
+; RV32I-NEXT: lw s3, 0(a0)
+; RV32I-NEXT: lw s2, 4(a0)
+; RV32I-NEXT: lw s5, 8(a0)
+; RV32I-NEXT: lw s4, 12(a0)
+; RV32I-NEXT: #APP
+; RV32I-NEXT: #NO_APP
+; RV32I-NEXT: lw s2, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s5, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: .cfi_restore s2
+; RV32I-NEXT: .cfi_restore s3
+; RV32I-NEXT: .cfi_restore s4
+; RV32I-NEXT: .cfi_restore s5
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: .cfi_def_cfa_offset 0
+; RV32I-NEXT: ret
+;
+; RV32D-LABEL: testi:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: addi sp, sp, -16
+; RV32D-NEXT: .cfi_def_cfa_offset 16
+; RV32D-NEXT: sw s2, 12(sp) # 4-byte Folded Spill
+; RV32D-NEXT: sw s3, 8(sp) # 4-byte Folded Spill
+; RV32D-NEXT: sw s4, 4(sp) # 4-byte Folded Spill
+; RV32D-NEXT: sw s5, 0(sp) # 4-byte Folded Spill
+; RV32D-NEXT: .cfi_offset s2, -4
+; RV32D-NEXT: .cfi_offset s3, -8
+; RV32D-NEXT: .cfi_offset s4, -12
+; RV32D-NEXT: .cfi_offset s5, -16
+; RV32D-NEXT: lw s3, 0(a0)
+; RV32D-NEXT: lw s2, 4(a0)
+; RV32D-NEXT: lw s5, 8(a0)
+; RV32D-NEXT: lw s4, 12(a0)
+; RV32D-NEXT: #APP
+; RV32D-NEXT: #NO_APP
+; RV32D-NEXT: lw s2, 12(sp) # 4-byte Folded Reload
+; RV32D-NEXT: lw s3, 8(sp) # 4-byte Folded Reload
+; RV32D-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32D-NEXT: lw s5, 0(sp) # 4-byte Folded Reload
+; RV32D-NEXT: .cfi_restore s2
+; RV32D-NEXT: .cfi_restore s3
+; RV32D-NEXT: .cfi_restore s4
+; RV32D-NEXT: .cfi_restore s5
+; RV32D-NEXT: addi sp, sp, 16
+; RV32D-NEXT: .cfi_def_cfa_offset 0
+; RV32D-NEXT: ret
+;
+; RV64I-LABEL: testi:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: .cfi_def_cfa_offset 32
+; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: .cfi_offset s2, -8
+; RV64I-NEXT: .cfi_offset s3, -16
+; RV64I-NEXT: .cfi_offset s4, -24
+; RV64I-NEXT: .cfi_offset s5, -32
+; RV64I-NEXT: ld s3, 0(a0)
+; RV64I-NEXT: ld s2, 8(a0)
+; RV64I-NEXT: ld s5, 16(a0)
+; RV64I-NEXT: ld s4, 24(a0)
+; RV64I-NEXT: #APP
+; RV64I-NEXT: #NO_APP
+; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: .cfi_restore s2
+; RV64I-NEXT: .cfi_restore s3
+; RV64I-NEXT: .cfi_restore s4
+; RV64I-NEXT: .cfi_restore s5
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: .cfi_def_cfa_offset 0
+; RV64I-NEXT: ret
+;
+; RV64D-LABEL: testi:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: addi sp, sp, -32
+; RV64D-NEXT: .cfi_def_cfa_offset 32
+; RV64D-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64D-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64D-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64D-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64D-NEXT: .cfi_offset s2, -8
+; RV64D-NEXT: .cfi_offset s3, -16
+; RV64D-NEXT: .cfi_offset s4, -24
+; RV64D-NEXT: .cfi_offset s5, -32
+; RV64D-NEXT: ld s3, 0(a0)
+; RV64D-NEXT: ld s2, 8(a0)
+; RV64D-NEXT: ld s5, 16(a0)
+; RV64D-NEXT: ld s4, 24(a0)
+; RV64D-NEXT: #APP
+; RV64D-NEXT: #NO_APP
+; RV64D-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64D-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64D-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64D-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64D-NEXT: .cfi_restore s2
+; RV64D-NEXT: .cfi_restore s3
+; RV64D-NEXT: .cfi_restore s4
+; RV64D-NEXT: .cfi_restore s5
+; RV64D-NEXT: addi sp, sp, 32
+; RV64D-NEXT: .cfi_def_cfa_offset 0
+; RV64D-NEXT: ret
+;
+; RV32I_PAIR-LABEL: testi:
+; RV32I_PAIR: # %bb.0: # %entry
+; RV32I_PAIR-NEXT: addi sp, sp, -16
+; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 16
+; RV32I_PAIR-NEXT: swp s3, s2, 8(sp) # 8-byte Folded Spill
+; RV32I_PAIR-NEXT: swp s5, s4, 0(sp) # 8-byte Folded Spill
+; RV32I_PAIR-NEXT: .cfi_offset s2, -4
+; RV32I_PAIR-NEXT: .cfi_offset s3, -8
+; RV32I_PAIR-NEXT: .cfi_offset s4, -12
+; RV32I_PAIR-NEXT: .cfi_offset s5, -16
+; RV32I_PAIR-NEXT: lwp s3, s2, 0(a0)
+; RV32I_PAIR-NEXT: lwp s5, s4, 8(a0)
+; RV32I_PAIR-NEXT: #APP
+; RV32I_PAIR-NEXT: #NO_APP
+; RV32I_PAIR-NEXT: lwp s3, s2, 8(sp) # 8-byte Folded Reload
+; RV32I_PAIR-NEXT: lwp s5, s4, 0(sp) # 8-byte Folded Reload
+; RV32I_PAIR-NEXT: .cfi_restore s2
+; RV32I_PAIR-NEXT: .cfi_restore s3
+; RV32I_PAIR-NEXT: .cfi_restore s4
+; RV32I_PAIR-NEXT: .cfi_restore s5
+; RV32I_PAIR-NEXT: addi sp, sp, 16
+; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV32I_PAIR-NEXT: ret
+;
+; RV32D_PAIR-LABEL: testi:
+; RV32D_PAIR: # %bb.0: # %entry
+; RV32D_PAIR-NEXT: addi sp, sp, -16
+; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 16
+; RV32D_PAIR-NEXT: swp s3, s2, 8(sp) # 8-byte Folded Spill
+; RV32D_PAIR-NEXT: swp s5, s4, 0(sp) # 8-byte Folded Spill
+; RV32D_PAIR-NEXT: .cfi_offset s2, -4
+; RV32D_PAIR-NEXT: .cfi_offset s3, -8
+; RV32D_PAIR-NEXT: .cfi_offset s4, -12
+; RV32D_PAIR-NEXT: .cfi_offset s5, -16
+; RV32D_PAIR-NEXT: lwp s3, s2, 0(a0)
+; RV32D_PAIR-NEXT: lwp s5, s4, 8(a0)
+; RV32D_PAIR-NEXT: #APP
+; RV32D_PAIR-NEXT: #NO_APP
+; RV32D_PAIR-NEXT: lwp s3, s2, 8(sp) # 8-byte Folded Reload
+; RV32D_PAIR-NEXT: lwp s5, s4, 0(sp) # 8-byte Folded Reload
+; RV32D_PAIR-NEXT: .cfi_restore s2
+; RV32D_PAIR-NEXT: .cfi_restore s3
+; RV32D_PAIR-NEXT: .cfi_restore s4
+; RV32D_PAIR-NEXT: .cfi_restore s5
+; RV32D_PAIR-NEXT: addi sp, sp, 16
+; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV32D_PAIR-NEXT: ret
+;
+; RV64I_PAIR-LABEL: testi:
+; RV64I_PAIR: # %bb.0: # %entry
+; RV64I_PAIR-NEXT: addi sp, sp, -32
+; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 32
+; RV64I_PAIR-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill
+; RV64I_PAIR-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill
+; RV64I_PAIR-NEXT: .cfi_offset s2, -8
+; RV64I_PAIR-NEXT: .cfi_offset s3, -16
+; RV64I_PAIR-NEXT: .cfi_offset s4, -24
+; RV64I_PAIR-NEXT: .cfi_offset s5, -32
+; RV64I_PAIR-NEXT: ld s3, 0(a0)
+; RV64I_PAIR-NEXT: ld s2, 8(a0)
+; RV64I_PAIR-NEXT: ld s5, 16(a0)
+; RV64I_PAIR-NEXT: ld s4, 24(a0)
+; RV64I_PAIR-NEXT: #APP
+; RV64I_PAIR-NEXT: #NO_APP
+; RV64I_PAIR-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload
+; RV64I_PAIR-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload
+; RV64I_PAIR-NEXT: .cfi_restore s2
+; RV64I_PAIR-NEXT: .cfi_restore s3
+; RV64I_PAIR-NEXT: .cfi_restore s4
+; RV64I_PAIR-NEXT: .cfi_restore s5
+; RV64I_PAIR-NEXT: addi sp, sp, 32
+; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV64I_PAIR-NEXT: ret
+;
+; RV64P_8700-LABEL: testi:
+; RV64P_8700: # %bb.0: # %entry
+; RV64P_8700-NEXT: addi sp, sp, -32
+; RV64P_8700-NEXT: .cfi_def_cfa_offset 32
+; RV64P_8700-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill
+; RV64P_8700-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill
+; RV64P_8700-NEXT: .cfi_offset s2, -8
+; RV64P_8700-NEXT: .cfi_offset s3, -16
+; RV64P_8700-NEXT: .cfi_offset s4, -24
+; RV64P_8700-NEXT: .cfi_offset s5, -32
+; RV64P_8700-NEXT: ld s3, 0(a0)
+; RV64P_8700-NEXT: ld s2, 8(a0)
+; RV64P_8700-NEXT: ld s5, 16(a0)
+; RV64P_8700-NEXT: ld s4, 24(a0)
+; RV64P_8700-NEXT: #APP
+; RV64P_8700-NEXT: #NO_APP
+; RV64P_8700-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload
+; RV64P_8700-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload
+; RV64P_8700-NEXT: .cfi_restore s2
+; RV64P_8700-NEXT: .cfi_restore s3
+; RV64P_8700-NEXT: .cfi_restore s4
+; RV64P_8700-NEXT: .cfi_restore s5
+; RV64P_8700-NEXT: addi sp, sp, 32
+; RV64P_8700-NEXT: .cfi_def_cfa_offset 0
+; RV64P_8700-NEXT: ret
+;
+; RV64D_PAIR-LABEL: testi:
+; RV64D_PAIR: # %bb.0: # %entry
+; RV64D_PAIR-NEXT: addi sp, sp, -32
+; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 32
+; RV64D_PAIR-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill
+; RV64D_PAIR-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill
+; RV64D_PAIR-NEXT: .cfi_offset s2, -8
+; RV64D_PAIR-NEXT: .cfi_offset s3, -16
+; RV64D_PAIR-NEXT: .cfi_offset s4, -24
+; RV64D_PAIR-NEXT: .cfi_offset s5, -32
+; RV64D_PAIR-NEXT: ld s3, 0(a0)
+; RV64D_PAIR-NEXT: ld s2, 8(a0)
+; RV64D_PAIR-NEXT: ld s5, 16(a0)
+; RV64D_PAIR-NEXT: ld s4, 24(a0)
+; RV64D_PAIR-NEXT: #APP
+; RV64D_PAIR-NEXT: #NO_APP
+; RV64D_PAIR-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload
+; RV64D_PAIR-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload
+; RV64D_PAIR-NEXT: .cfi_restore s2
+; RV64D_PAIR-NEXT: .cfi_restore s3
+; RV64D_PAIR-NEXT: .cfi_restore s4
+; RV64D_PAIR-NEXT: .cfi_restore s5
+; RV64D_PAIR-NEXT: addi sp, sp, 32
+; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 0
+; RV64D_PAIR-NEXT: ret
+;
+; RV64D_8700-LABEL: testi:
+; RV64D_8700: # %bb.0: # %entry
+; RV64D_8700-NEXT: addi sp, sp, -32
+; RV64D_8700-NEXT: .cfi_def_cfa_offset 32
+; RV64D_8700-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT: sd s5, 0(sp) # 8-byte Folded Spill
+; RV64D_8700-NEXT: .cfi_offset s2, -8
+; RV64D_8700-NEXT: .cfi_offset s3, -16
+; RV64D_8700-NEXT: .cfi_offset s4, -24
+; RV64D_8700-NEXT: .cfi_offset s5, -32
+; RV64D_8700-NEXT: ld s3, 0(a0)
+; RV64D_8700-NEXT: ld s2, 8(a0)
+; RV64D_8700-NEXT: ld s5, 16(a0)
+; RV64D_8700-NEXT: ld s4, 24(a0)
+; RV64D_8700-NEXT: #APP
+; RV64D_8700-NEXT: #NO_APP
+; RV64D_8700-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT: ld s5, 0(sp) # 8-byte Folded Reload
+; RV64D_8700-NEXT: .cfi_restore s2
+; RV64D_8700-NEXT: .cfi_restore s3
+; RV64D_8700-NEXT: .cfi_restore s4
+; RV64D_8700-NEXT: .cfi_restore s5
+; RV64D_8700-NEXT: addi sp, sp, 32
+; RV64D_8700-NEXT: .cfi_def_cfa_offset 0
+; RV64D_8700-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds i8*, i8** %a, i64 1
+ %0 = load i8*, i8** %arrayidx, align 8
+ %1 = load i8*, i8** %a, align 8
+ %arrayidx2 = getelementptr inbounds i8*, i8** %a, i64 3
+ %2 = load i8*, i8** %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds i8*, i8** %a, i64 2
+ %3 = load i8*, i8** %arrayidx3, align 8
+ tail call void asm sideeffect "", "{x18},{x19},{x20},{x21}"(i8* %0, i8* %1, i8* %2, i8* %3)
+ ret void
+}
+
+
+define dso_local void @testf(float* nocapture noundef readonly %a) local_unnamed_addr #0 {
+; RV32I-LABEL: testf:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a3, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
+; RV32I-NEXT: lw a2, 8(a0)
+; RV32I-NEXT: lw a1, 12(a0)
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: tail sinkf
+;
+; RV32D-LABEL: testf:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: flw fa3, 0(a0)
+; RV32D-NEXT: flw fa0, 4(a0)
+; RV32D-NEXT: flw fa2, 8(a0)
+; RV32D-NEXT: flw fa1, 12(a0)
+; RV32D-NEXT: tail sinkf
+;
+; RV64I-LABEL: testf:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lw a3, 0(a0)
+; RV64I-NEXT: lw a4, 4(a0)
+; RV64I-NEXT: lw a2, 8(a0)
+; RV64I-NEXT: lw a1, 12(a0)
+; RV64I-NEXT: mv a0, a4
+; RV64I-NEXT: tail sinkf
+;
+; RV64D-LABEL: testf:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: flw fa3, 0(a0)
+; RV64D-NEXT: flw fa0, 4(a0)
+; RV64D-NEXT: flw fa2, 8(a0)
+; RV64D-NEXT: flw fa1, 12(a0)
+; RV64D-NEXT: tail sinkf
+;
+; RV32I_PAIR-LABEL: testf:
+; RV32I_PAIR: # %bb.0: # %entry
+; RV32I_PAIR-NEXT: lw a3, 0(a0)
+; RV32I_PAIR-NEXT: lw a4, 4(a0)
+; RV32I_PAIR-NEXT: lw a2, 8(a0)
+; RV32I_PAIR-NEXT: lw a1, 12(a0)
+; RV32I_PAIR-NEXT: mv a0, a4
+; RV32I_PAIR-NEXT: tail sinkf
+;
+; RV32D_PAIR-LABEL: testf:
+; RV32D_PAIR: # %bb.0: # %entry
+; RV32D_PAIR-NEXT: flw fa3, 0(a0)
+; RV32D_PAIR-NEXT: flw fa0, 4(a0)
+; RV32D_PAIR-NEXT: flw fa2, 8(a0)
+; RV32D_PAIR-NEXT: flw fa1, 12(a0)
+; RV32D_PAIR-NEXT: tail sinkf
+;
+; RV64I_PAIR-LABEL: testf:
+; RV64I_PAIR: # %bb.0: # %entry
+; RV64I_PAIR-NEXT: lw a3, 0(a0)
+; RV64I_PAIR-NEXT: lw a4, 4(a0)
+; RV64I_PAIR-NEXT: lw a2, 8(a0)
+; RV64I_PAIR-NEXT: lw a1, 12(a0)
+; RV64I_PAIR-NEXT: mv a0, a4
+; RV64I_PAIR-NEXT: tail sinkf
+;
+; RV64P_8700-LABEL: testf:
+; RV64P_8700: # %bb.0: # %entry
+; RV64P_8700-NEXT: flw fa3, 0(a0)
+; RV64P_8700-NEXT: flw fa0, 4(a0)
+; RV64P_8700-NEXT: flw fa2, 8(a0)
+; RV64P_8700-NEXT: flw fa1, 12(a0)
+; RV64P_8700-NEXT: tail sinkf
+;
+; RV64D_PAIR-LABEL: testf:
+; RV64D_PAIR: # %bb.0: # %entry
+; RV64D_PAIR-NEXT: flw fa3, 0(a0)
+; RV64D_PAIR-NEXT: flw fa0, 4(a0)
+; RV64D_PAIR-NEXT: flw fa2, 8(a0)
+; RV64D_PAIR-NEXT: flw fa1, 12(a0)
+; RV64D_PAIR-NEXT: tail sinkf
+;
+; RV64D_8700-LABEL: testf:
+; RV64D_8700: # %bb.0: # %entry
+; RV64D_8700-NEXT: flw fa3, 0(a0)
+; RV64D_8700-NEXT: flw fa0, 4(a0)
+; RV64D_8700-NEXT: flw fa2, 8(a0)
+; RV64D_8700-NEXT: flw fa1, 12(a0)
+; RV64D_8700-NEXT: tail sinkf
+entry:
+ %arrayidx = getelementptr inbounds float, float* %a, i64 1
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %a, i64 3
+ %1 = load float, float* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, float* %a, i64 2
+ %2 = load float, float* %arrayidx2, align 4
+ %3 = load float, float* %a, align 4
+ tail call void @sinkf(float noundef %0, float noundef %1, float noundef %2, float noundef %3)
+ ret void
+}
+
+declare dso_local void @sinkf(float noundef, float noundef, float noundef, float noundef) local_unnamed_addr
+
+define dso_local void @testd(double* nocapture noundef readonly %a) local_unnamed_addr #0 {
+; RV32I-LABEL: testd:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lw a4, 16(a0)
+; RV32I-NEXT: lw a5, 20(a0)
+; RV32I-NEXT: lw a2, 24(a0)
+; RV32I-NEXT: lw a3, 28(a0)
+; RV32I-NEXT: lw a6, 0(a0)
+; RV32I-NEXT: lw a7, 4(a0)
+; RV32I-NEXT: lw t0, 8(a0)
+; RV32I-NEXT: lw a1, 12(a0)
+; RV32I-NEXT: mv a0, t0
+; RV32I-NEXT: tail sinkd
+;
+; RV32D-LABEL: testd:
+; RV32D: # %bb.0: # %entry
+; RV32D-NEXT: fld fa3, 0(a0)
+; RV32D-NEXT: fld fa0, 8(a0)
+; RV32D-NEXT: fld fa2, 16(a0)
+; RV32D-NEXT: fld fa1, 24(a0)
+; RV32D-NEXT: tail sinkd
+;
+; RV64I-LABEL: testd:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: ld a4, 8(a0)
+; RV64I-NEXT: ld a2, 16(a0)
+; RV64I-NEXT: ld a1, 24(a0)
+; RV64I-NEXT: mv a0, a4
+; RV64I-NEXT: tail sinkd
+;
+; RV64D-LABEL: testd:
+; RV64D: # %bb.0: # %entry
+; RV64D-NEXT: fld fa3, 0(a0)
+; RV64D-NEXT: fld fa0, 8(a0)
+; RV64D-NEXT: fld fa2, 16(a0)
+; RV64D-NEXT: fld fa1, 24(a0)
+; RV64D-NEXT: tail sinkd
+;
+; RV32I_PAIR-LABEL: testd:
+; RV32I_PAIR: # %bb.0: # %entry
+; RV32I_PAIR-NEXT: lwp a4, a5, 16(a0)
+; RV32I_PAIR-NEXT: lwp a2, a3, 24(a0)
+; RV32I_PAIR-NEXT: lwp a6, a7, 0(a0)
+; RV32I_PAIR-NEXT: lwp a0, a1, 8(a0)
+; RV32I_PAIR-NEXT: tail sinkd
+;
+; RV32D_PAIR-LABEL: testd:
+; RV32D_PAIR: # %bb.0: # %entry
+; RV32D_PAIR-NEXT: fld fa3, 0(a0)
+; RV32D_PAIR-NEXT: fld fa0, 8(a0)
+; RV32D_PAIR-NEXT: fld fa2, 16(a0)
+; RV32D_PAIR-NEXT: fld fa1, 24(a0)
+; RV32D_PAIR-NEXT: tail sinkd
+;
+; RV64I_PAIR-LABEL: testd:
+; RV64I_PAIR: # %bb.0: # %entry
+; RV64I_PAIR-NEXT: ld a3, 0(a0)
+; RV64I_PAIR-NEXT: ld a4, 8(a0)
+; RV64I_PAIR-NEXT: ld a2, 16(a0)
+; RV64I_PAIR-NEXT: ld a1, 24(a0)
+; RV64I_PAIR-NEXT: mv a0, a4
+; RV64I_PAIR-NEXT: tail sinkd
+;
+; RV64P_8700-LABEL: testd:
+; RV64P_8700: # %bb.0: # %entry
+; RV64P_8700-NEXT: fld fa3, 0(a0)
+; RV64P_8700-NEXT: fld fa0, 8(a0)
+; RV64P_8700-NEXT: fld fa2, 16(a0)
+; RV64P_8700-NEXT: fld fa1, 24(a0)
+; RV64P_8700-NEXT: tail sinkd
+;
+; RV64D_PAIR-LABEL: testd:
+; RV64D_PAIR: # %bb.0: # %entry
+; RV64D_PAIR-NEXT: fld fa3, 0(a0)
+; RV64D_PAIR-NEXT: fld fa0, 8(a0)
+; RV64D_PAIR-NEXT: fld fa2, 16(a0)
+; RV64D_PAIR-NEXT: fld fa1, 24(a0)
+; RV64D_PAIR-NEXT: tail sinkd
+;
+; RV64D_8700-LABEL: testd:
+; RV64D_8700: # %bb.0: # %entry
+; RV64D_8700-NEXT: fld fa3, 0(a0)
+; RV64D_8700-NEXT: fld fa0, 8(a0)
+; RV64D_8700-NEXT: fld fa2, 16(a0)
+; RV64D_8700-NEXT: fld fa1, 24(a0)
+; RV64D_8700-NEXT: tail sinkd
+entry:
+ %arrayidx = getelementptr inbounds double, double* %a, i64 1
+ %0 = load double, double* %arrayidx, align 8
+ %arrayidx1 = getelementptr inbounds double, double* %a, i64 3
+ %1 = load double, double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 2
+ %2 = load double, double* %arrayidx2, align 8
+ %3 = load double, double* %a, align 8
+ tail call void @sinkd(double noundef %0, double noundef %1, double noundef %2, double noundef %3)
+ ret void
+}
+
+declare dso_local void @sinkd(double noundef, double noundef, double noundef, double noundef) local_unnamed_addr
diff --git a/llvm/test/CodeGen/RISCV/select-and.ll b/llvm/test/CodeGen/RISCV/select-and.ll
index d305993f0e966b..56f6285ab05577 100644
--- a/llvm/test/CodeGen/RISCV/select-and.ll
+++ b/llvm/test/CodeGen/RISCV/select-and.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+xmipscmove -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
;; There are a few different ways to lower (select (and A, B), X, Y). This test
;; ensures that we do so with as few branches as possible.
@@ -27,6 +29,12 @@ define signext i32 @select_of_and(i1 zeroext %a, i1 zeroext %b, i32 signext %c,
; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: select_of_and:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: and a0, a0, a1
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a2, a3
+; RV64I-CCMOV-NEXT: ret
%1 = and i1 %a, %b
%2 = select i1 %1, i32 %c, i32 %d
ret i32 %2
@@ -69,6 +77,23 @@ define signext i32 @if_of_and(i1 zeroext %a, i1 zeroext %b) nounwind {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: if_of_and:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: addi sp, sp, -16
+; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-CCMOV-NEXT: beqz a0, .LBB1_3
+; RV64I-CCMOV-NEXT: # %bb.1:
+; RV64I-CCMOV-NEXT: beqz a1, .LBB1_3
+; RV64I-CCMOV-NEXT: # %bb.2: # %if.then
+; RV64I-CCMOV-NEXT: call both
+; RV64I-CCMOV-NEXT: j .LBB1_4
+; RV64I-CCMOV-NEXT: .LBB1_3: # %if.else
+; RV64I-CCMOV-NEXT: call neither
+; RV64I-CCMOV-NEXT: .LBB1_4: # %if.end
+; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-CCMOV-NEXT: addi sp, sp, 16
+; RV64I-CCMOV-NEXT: ret
%1 = and i1 %a, %b
br i1 %1, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/RISCV/select-bare.ll b/llvm/test/CodeGen/RISCV/select-bare.ll
index cf8fe96742bfbd..4b49a626e28bee 100644
--- a/llvm/test/CodeGen/RISCV/select-bare.ll
+++ b/llvm/test/CodeGen/RISCV/select-bare.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+xmipscmove -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind {
; RV32I-LABEL: bare_select:
@@ -12,6 +14,12 @@ define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind {
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB0_2:
; RV32I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: bare_select:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: andi a0, a0, 1
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2
+; RV64I-CCMOV-NEXT: ret
%1 = select i1 %a, i32 %b, i32 %c
ret i32 %1
}
@@ -26,6 +34,12 @@ define float @bare_select_float(i1 %a, float %b, float %c) nounwind {
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: .LBB1_2:
; RV32I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: bare_select_float:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: andi a0, a0, 1
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2
+; RV64I-CCMOV-NEXT: ret
%1 = select i1 %a, float %b, float %c
ret float %1
}
diff --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll
index 31e25702da8ba2..17afbd724e3525 100644
--- a/llvm/test/CodeGen/RISCV/select-cc.ll
+++ b/llvm/test/CodeGen/RISCV/select-cc.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck -check-prefixes=RV32I %s
; RUN: llc -mtriple=riscv64 -disable-block-placement -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+xmipscmove -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
define signext i32 @foo(i32 signext %a, ptr %b) nounwind {
; RV32I-LABEL: foo:
@@ -156,6 +158,57 @@ define signext i32 @foo(i32 signext %a, ptr %b) nounwind {
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB0_28:
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: foo:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: lw a2, 0(a1)
+; RV64I-CCMOV-NEXT: lw a3, 0(a1)
+; RV64I-CCMOV-NEXT: lw a4, 0(a1)
+; RV64I-CCMOV-NEXT: lw a5, 0(a1)
+; RV64I-CCMOV-NEXT: xor a6, a0, a2
+; RV64I-CCMOV-NEXT: ccmov a0, a6, a2, a0
+; RV64I-CCMOV-NEXT: xor a2, a0, a3
+; RV64I-CCMOV-NEXT: ccmov a0, a2, a0, a3
+; RV64I-CCMOV-NEXT: lw a2, 0(a1)
+; RV64I-CCMOV-NEXT: sltu a3, a4, a0
+; RV64I-CCMOV-NEXT: ccmov a0, a3, a0, a4
+; RV64I-CCMOV-NEXT: lw a3, 0(a1)
+; RV64I-CCMOV-NEXT: sltu a4, a0, a5
+; RV64I-CCMOV-NEXT: ccmov a0, a4, a5, a0
+; RV64I-CCMOV-NEXT: lw a4, 0(a1)
+; RV64I-CCMOV-NEXT: sltu a5, a0, a2
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a2
+; RV64I-CCMOV-NEXT: lw a2, 0(a1)
+; RV64I-CCMOV-NEXT: sltu a5, a3, a0
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a3, a0
+; RV64I-CCMOV-NEXT: lw a3, 0(a1)
+; RV64I-CCMOV-NEXT: sext.w a5, a0
+; RV64I-CCMOV-NEXT: slt a5, a4, a5
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a4
+; RV64I-CCMOV-NEXT: lw a4, 0(a1)
+; RV64I-CCMOV-NEXT: sext.w a5, a0
+; RV64I-CCMOV-NEXT: slt a5, a5, a2
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a2, a0
+; RV64I-CCMOV-NEXT: lw a2, 0(a1)
+; RV64I-CCMOV-NEXT: sext.w a5, a0
+; RV64I-CCMOV-NEXT: slt a5, a5, a3
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a3
+; RV64I-CCMOV-NEXT: lw a3, 0(a1)
+; RV64I-CCMOV-NEXT: sext.w a5, a0
+; RV64I-CCMOV-NEXT: slt a5, a4, a5
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a4, a0
+; RV64I-CCMOV-NEXT: lw a4, 0(a1)
+; RV64I-CCMOV-NEXT: slti a5, a2, 1
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a2
+; RV64I-CCMOV-NEXT: slti a5, a2, 0
+; RV64I-CCMOV-NEXT: ccmov a0, a5, a3, a0
+; RV64I-CCMOV-NEXT: lw a1, 0(a1)
+; RV64I-CCMOV-NEXT: slti a3, a4, 1025
+; RV64I-CCMOV-NEXT: ccmov a0, a3, a4, a0
+; RV64I-CCMOV-NEXT: sltiu a2, a2, 2047
+; RV64I-CCMOV-NEXT: ccmov a0, a2, a1, a0
+; RV64I-CCMOV-NEXT: sext.w a0, a0
+; RV64I-CCMOV-NEXT: ret
%val1 = load volatile i32, ptr %b
%tst1 = icmp eq i32 %a, %val1
%val2 = select i1 %tst1, i32 %a, i32 %val1
@@ -258,6 +311,23 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: numsignbits:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: addi sp, sp, -16
+; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-CCMOV-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-CCMOV-NEXT: ccmov s0, a0, a2, a3
+; RV64I-CCMOV-NEXT: beqz a1, .LBB1_2
+; RV64I-CCMOV-NEXT: # %bb.1:
+; RV64I-CCMOV-NEXT: mv a0, s0
+; RV64I-CCMOV-NEXT: call bar
+; RV64I-CCMOV-NEXT: .LBB1_2:
+; RV64I-CCMOV-NEXT: mv a0, s0
+; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-CCMOV-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-CCMOV-NEXT: addi sp, sp, 16
+; RV64I-CCMOV-NEXT: ret
%5 = icmp eq i16 %0, 0
%6 = select i1 %5, i16 %3, i16 %2
%7 = icmp eq i16 %1, 0
@@ -295,6 +365,14 @@ define i32 @select_sge_int16min(i32 signext %x, i32 signext %y, i32 signext %z)
; RV64I-NEXT: .LBB2_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: select_sge_int16min:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: lui a3, 1048560
+; RV64I-CCMOV-NEXT: addiw a3, a3, -1
+; RV64I-CCMOV-NEXT: slt a0, a3, a0
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2
+; RV64I-CCMOV-NEXT: ret
%a = icmp sge i32 %x, -65536
%b = select i1 %a, i32 %y, i32 %z
ret i32 %b
@@ -331,6 +409,14 @@ define i64 @select_sge_int32min(i64 %x, i64 %y, i64 %z) {
; RV64I-NEXT: .LBB3_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: select_sge_int32min:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: lui a3, 524288
+; RV64I-CCMOV-NEXT: addi a3, a3, -1
+; RV64I-CCMOV-NEXT: slt a0, a3, a0
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2
+; RV64I-CCMOV-NEXT: ret
%a = icmp sge i64 %x, -2147483648
%b = select i1 %a, i64 %y, i64 %z
ret i64 %b
diff --git a/llvm/test/CodeGen/RISCV/select-or.ll b/llvm/test/CodeGen/RISCV/select-or.ll
index 20a5ec15290cdb..b3e68a9282533e 100644
--- a/llvm/test/CodeGen/RISCV/select-or.ll
+++ b/llvm/test/CodeGen/RISCV/select-or.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+xmipscmove -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
;; There are a few different ways to lower (select (or A, B), X, Y). This test
;; ensures that we do so with as few branches as possible.
@@ -27,6 +29,12 @@ define signext i32 @select_of_or(i1 zeroext %a, i1 zeroext %b, i32 signext %c, i
; RV64I-NEXT: mv a0, a3
; RV64I-NEXT: .LBB0_2:
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: select_of_or:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: or a0, a0, a1
+; RV64I-CCMOV-NEXT: ccmov a0, a0, a2, a3
+; RV64I-CCMOV-NEXT: ret
%1 = or i1 %a, %b
%2 = select i1 %1, i32 %c, i32 %d
ret i32 %2
@@ -69,6 +77,23 @@ define signext i32 @if_of_or(i1 zeroext %a, i1 zeroext %b) nounwind {
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
+;
+; RV64I-CCMOV-LABEL: if_of_or:
+; RV64I-CCMOV: # %bb.0:
+; RV64I-CCMOV-NEXT: addi sp, sp, -16
+; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-CCMOV-NEXT: bnez a0, .LBB1_3
+; RV64I-CCMOV-NEXT: # %bb.1:
+; RV64I-CCMOV-NEXT: bnez a1, .LBB1_3
+; RV64I-CCMOV-NEXT: # %bb.2: # %if.else
+; RV64I-CCMOV-NEXT: call neither
+; RV64I-CCMOV-NEXT: j .LBB1_4
+; RV64I-CCMOV-NEXT: .LBB1_3: # %if.then
+; RV64I-CCMOV-NEXT: call either
+; RV64I-CCMOV-NEXT: .LBB1_4: # %if.end
+; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-CCMOV-NEXT: addi sp, sp, 16
+; RV64I-CCMOV-NEXT: ret
%1 = or i1 %a, %b
br i1 %1, label %if.then, label %if.else
More information about the llvm-commits
mailing list