[llvm] 2b32e4f - [RISCV] Add basic support for the sifive-7-series short forward branch optimization.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 17 13:57:15 PDT 2022
Author: Craig Topper
Date: 2022-10-17T13:56:22-07:00
New Revision: 2b32e4f98b4f0e9e6a7c301b627ff75d9af8699e
URL: https://github.com/llvm/llvm-project/commit/2b32e4f98b4f0e9e6a7c301b627ff75d9af8699e
DIFF: https://github.com/llvm/llvm-project/commit/2b32e4f98b4f0e9e6a7c301b627ff75d9af8699e.diff
LOG: [RISCV] Add basic support for the sifive-7-series short forward branch optimization.
sifive-7-series has macrofusion support to convert a branch over
a single instruction into a conditional instruction. This can be
an improvement if the branch is hard to predict.
This patch adds support for the most basic case, a branch over a
move instruction. This is implemented as a pseudo instruction so
we can hide the control flow until all code motion passes complete.
I've disabled a recent select optimization if this feature is enabled
in the subtarget.
Related gcc patch for the same optimization https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg211045.html
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D135814
Added:
llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll
Modified:
llvm/lib/Target/RISCV/RISCV.td
llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.td
llvm/lib/Target/RISCV/RISCVSchedRocket.td
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
llvm/lib/Target/RISCV/RISCVSchedule.td
llvm/lib/Target/RISCV/RISCVSubtarget.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 438c614d259ce..822b2c44c2309 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -449,9 +449,18 @@ def TuneNoDefaultUnroll
: SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
"Disable default unroll preference.">;
+// SiFive 7 is able to fuse integer ALU operations with a preceding branch
+// instruction.
+def TuneShortForwardBranchOpt
+ : SubtargetFeature<"short-forward-branch-opt", "HasShortForwardBranchOpt",
+ "true", "Enable short forward branch optimization">;
+def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
+def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;
+
def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors",
- [TuneNoDefaultUnroll]>;
+ [TuneNoDefaultUnroll,
+ TuneShortForwardBranchOpt]>;
// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index b5d5ffee7b857..83ec165474a46 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -45,6 +45,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
bool expandMBB(MachineBasicBlock &MBB);
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandCCOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opcode);
@@ -82,6 +84,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
// expanded instructions for each pseudo is correct in the Size field of the
// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
+ case RISCV::PseudoCCMOVGPR:
+ return expandCCOp(MBB, MBBI, NextMBBI);
case RISCV::PseudoVSETVLI:
case RISCV::PseudoVSETVLIX0:
case RISCV::PseudoVSETIVLI:
@@ -133,6 +137,60 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
return false;
}
+bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ assert(MBBI->getOpcode() == RISCV::PseudoCCMOVGPR && "Unexpected opcode");
+
+ MachineFunction *MF = MBB.getParent();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ MachineBasicBlock *MergeBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MF->insert(++MBB.getIterator(), TrueBB);
+ MF->insert(++TrueBB->getIterator(), MergeBB);
+
+ // We want to copy the "true" value when the condition is true which means
+ // we need to invert the branch condition to jump over TrueBB when the
+ // condition is false.
+ auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
+ CC = RISCVCC::getOppositeBranchCondition(CC);
+
+ // Insert branch instruction.
+ BuildMI(MBB, MBBI, DL, TII->getBrCond(CC))
+ .addReg(MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg())
+ .addMBB(MergeBB);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ assert(MI.getOperand(4).getReg() == DestReg);
+
+ // Add MV.
+ BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg)
+ .add(MI.getOperand(5))
+ .addImm(0);
+
+ TrueBB->addSuccessor(MergeBB);
+
+ MergeBB->splice(MergeBB->end(), &MBB, MI, MBB.end());
+ MergeBB->transferSuccessors(&MBB);
+
+ MBB.addSuccessor(TrueBB);
+ MBB.addSuccessor(MergeBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ // Make sure live-ins are correctly attached to this new basic block.
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *TrueBB);
+ computeAndAddLiveIns(LiveRegs, *MergeBB);
+
+ return true;
+}
+
bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
assert(MBBI->getNumExplicitOperands() == 3 && MBBI->getNumOperands() >= 5 &&
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 874fe95c0a95b..0670b89539941 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4222,28 +4222,30 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
}
- // (select c, -1, y) -> -c | y
- if (isAllOnesConstant(TrueV)) {
- SDValue Neg = DAG.getNegative(CondV, DL, VT);
- return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
- }
- // (select c, y, -1) -> (c-1) | y
- if (isAllOnesConstant(FalseV)) {
- SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
- DAG.getAllOnesConstant(DL, VT));
- return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
- }
+ if (!Subtarget.hasShortForwardBranchOpt()) {
+ // (select c, -1, y) -> -c | y
+ if (isAllOnesConstant(TrueV)) {
+ SDValue Neg = DAG.getNegative(CondV, DL, VT);
+ return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
+ }
+ // (select c, y, -1) -> (c-1) | y
+ if (isAllOnesConstant(FalseV)) {
+ SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
+ }
- // (select c, 0, y) -> (c-1) & y
- if (isNullConstant(TrueV)) {
- SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
- DAG.getAllOnesConstant(DL, VT));
- return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
- }
- // (select c, y, 0) -> -c & y
- if (isNullConstant(FalseV)) {
- SDValue Neg = DAG.getNegative(CondV, DL, VT);
- return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+ // (select c, 0, y) -> (c-1) & y
+ if (isNullConstant(TrueV)) {
+ SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
+ }
+ // (select c, y, 0) -> -c & y
+ if (isNullConstant(FalseV)) {
+ SDValue Neg = DAG.getNegative(CondV, DL, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+ }
}
// If the CondV is the output of a SETCC node which operates on XLenVT inputs,
@@ -9450,9 +9452,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// (select (x in [0,1] != 0), (z ^ y), y ) -> (-x & z ) ^ y
// (select (x in [0,1] == 0), y, (z | y) ) -> (-x & z ) | y
// (select (x in [0,1] != 0), (z | y), y ) -> (-x & z ) | y
+ // NOTE: We only do this if the target does not have the short forward
+ // branch optimization.
APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
- if (isNullConstant(RHS) && ISD::isIntEqualitySetCC(CCVal) &&
- DAG.MaskedValueIsZero(LHS, Mask)) {
+ if (!Subtarget.hasShortForwardBranchOpt() && isNullConstant(RHS) &&
+ ISD::isIntEqualitySetCC(CCVal) && DAG.MaskedValueIsZero(LHS, Mask)) {
unsigned Opcode;
SDValue Src1, Src2;
// true if FalseV is XOR or OR operator and one of its operands
@@ -9504,35 +9508,36 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
{LHS, RHS, CC, TrueV, FalseV});
- // (select c, -1, y) -> -c | y
- if (isAllOnesConstant(TrueV)) {
- SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
- SDValue Neg = DAG.getNegative(C, DL, VT);
- return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
- }
- // (select c, y, -1) -> -!c | y
- if (isAllOnesConstant(FalseV)) {
- SDValue C = DAG.getSetCC(DL, VT, LHS, RHS,
- ISD::getSetCCInverse(CCVal, VT));
- SDValue Neg = DAG.getNegative(C, DL, VT);
- return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
- }
+ if (!Subtarget.hasShortForwardBranchOpt()) {
+ // (select c, -1, y) -> -c | y
+ if (isAllOnesConstant(TrueV)) {
+ SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
+ SDValue Neg = DAG.getNegative(C, DL, VT);
+ return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
+ }
+ // (select c, y, -1) -> -!c | y
+ if (isAllOnesConstant(FalseV)) {
+ SDValue C =
+ DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
+ SDValue Neg = DAG.getNegative(C, DL, VT);
+ return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
+ }
- // (select c, 0, y) -> -!c & y
- if (isNullConstant(TrueV)) {
- SDValue C = DAG.getSetCC(DL, VT, LHS, RHS,
- ISD::getSetCCInverse(CCVal, VT));
- SDValue Neg = DAG.getNegative(C, DL, VT);
- return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
- }
- // (select c, y, 0) -> -c & y
- if (isNullConstant(FalseV)) {
- SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
- SDValue Neg = DAG.getNegative(C, DL, VT);
- return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+ // (select c, 0, y) -> -!c & y
+ if (isNullConstant(TrueV)) {
+ SDValue C =
+ DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
+ SDValue Neg = DAG.getNegative(C, DL, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
+ }
+ // (select c, y, 0) -> -c & y
+ if (isNullConstant(FalseV)) {
+ SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
+ SDValue Neg = DAG.getNegative(C, DL, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+ }
}
-
return SDValue();
}
case RISCVISD::BR_CC: {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 05b0c016c943c..7ae0ceee9aad0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1616,6 +1616,9 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return false;
switch (MI.getOpcode()) {
+ case RISCV::PseudoCCMOVGPR:
+ // Operands 4 and 5 are commutable.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
case CASE_VFMA_SPLATS(FMADD):
case CASE_VFMA_SPLATS(FMSUB):
case CASE_VFMA_SPLATS(FMACC):
@@ -1761,6 +1764,15 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
};
switch (MI.getOpcode()) {
+ case RISCV::PseudoCCMOVGPR: {
+ // CCMOV can be commuted by inverting the condition.
+ auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
+ CC = RISCVCC::getOppositeBranchCondition(CC);
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.getOperand(3).setImm(CC);
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
+ OpIdx1, OpIdx2);
+ }
case CASE_VFMA_SPLATS(FMACC):
case CASE_VFMA_SPLATS(FMADD):
case CASE_VFMA_SPLATS(FMSAC):
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 4c8d54cc2c4ed..fb3bcf429d9a0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1313,6 +1313,20 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
node:$falsev), [{}],
IntCCtoRISCVCC>;
+let Predicates = [HasShortForwardBranchOpt],
+ Constraints = "$dst = $falsev", isCommutable = 1, Size = 8 in {
+// This instruction moves $truev to $dst when the condition is true. It will
+// be expanded to control flow in RISCVExpandPseudoInsts.
+def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$truev),
+ [(set GPR:$dst,
+ (riscv_selectcc_frag:$cc GPR:$lhs, GPR:$rhs,
+ cond, GPR:$truev,
+ GPR:$falsev))]>,
+ Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+}
+
multiclass SelectCC_GPR_rrirr<RegisterClass valty> {
let usesCustomInserter = 1 in
def _Using_CC_GPR : Pseudo<(outs valty:$dst),
@@ -1329,6 +1343,7 @@ multiclass SelectCC_GPR_rrirr<RegisterClass valty> {
(IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>;
}
+let Predicates = [NoShortForwardBranchOpt] in
defm Select_GPR : SelectCC_GPR_rrirr<GPR>;
class SelectCompressOpt<CondCode Cond>: Pat<(riscv_selectcc_frag:$select GPR:$lhs, simm12_no6:$Constant, Cond,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 4bfd352edca3d..e39585ff08412 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -246,4 +246,5 @@ defm : UnsupportedSchedZbs;
defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
defm : UnsupportedSchedZfh;
+defm : UnsupportedSchedSFB;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 32e2b1f6d1e4e..17df9e212eb81 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -44,6 +44,12 @@ def : WriteRes<WriteJal, [SiFive7PipeB]>;
def : WriteRes<WriteJalr, [SiFive7PipeB]>;
def : WriteRes<WriteJmpReg, [SiFive7PipeB]>;
+//Short forward branch
+def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
// Integer arithmetic and logic
let Latency = 3 in {
def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
@@ -223,6 +229,8 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
def : ReadAdvance<ReadFClass32, 0>;
def : ReadAdvance<ReadFClass64, 0>;
+def : ReadAdvance<ReadSFB, 0>;
+
//===----------------------------------------------------------------------===//
// Unsupported extensions
defm : UnsupportedSchedV;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index 8544e0012a767..0437f78c3dafd 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -105,6 +105,10 @@ def WriteFST16 : SchedWrite; // Floating point sp store
def WriteFST32 : SchedWrite; // Floating point sp store
def WriteFST64 : SchedWrite; // Floating point dp store
+// short forward branch for Bullet
+def WriteSFB : SchedWrite;
+def ReadSFB : SchedRead;
+
/// Define scheduler resources associated with use operands.
def ReadJmp : SchedRead;
def ReadJalr : SchedRead;
@@ -229,6 +233,14 @@ def : ReadAdvance<ReadFSqrt16, 0>;
} // Unsupported = true
}
+multiclass UnsupportedSchedSFB {
+let Unsupported = true in {
+def : WriteRes<WriteSFB, []>;
+
+def : ReadAdvance<ReadSFB, 0>;
+} // Unsupported = true
+}
+
// Include the scheduler resources for other instruction extensions.
include "RISCVScheduleZb.td"
include "RISCVScheduleV.td"
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 786194609e12c..89fb7cb212a87 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -96,6 +96,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool EnableDefaultUnroll = true;
bool EnableSaveRestore = false;
bool EnableUnalignedScalarMem = false;
+ bool HasShortForwardBranchOpt = false;
bool HasLUIADDIFusion = false;
bool HasForcedAtomics = false;
unsigned XLen = 32;
@@ -190,6 +191,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; }
bool enableDefaultUnroll() const { return EnableDefaultUnroll; }
bool enableSaveRestore() const { return EnableSaveRestore; }
+ bool hasShortForwardBranchOpt() const { return HasShortForwardBranchOpt; }
bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
bool hasForcedAtomics() const { return HasForcedAtomics; }
diff --git a/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll
new file mode 100644
index 0000000000000..ccb4c99837ec8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll
@@ -0,0 +1,385 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+c -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=NOSFB %s
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=SFB %s
+
+; The sifive-7-series can predicate a mv.
+
+define signext i32 @test1(i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOSFB-LABEL: test1:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: beqz a2, .LBB0_2
+; NOSFB-NEXT: # %bb.1:
+; NOSFB-NEXT: mv a0, a1
+; NOSFB-NEXT: .LBB0_2:
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: test1:
+; SFB: # %bb.0:
+; SFB-NEXT: beqz a2, .LBB0_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: mv a0, a1
+; SFB-NEXT: .LBB0_2:
+; SFB-NEXT: ret
+ %c = icmp eq i32 %z, 0
+ %b = select i1 %c, i32 %x, i32 %y
+ ret i32 %b
+}
+
+; Same as above with select operands swapped.
+define signext i32 @test2(i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOSFB-LABEL: test2:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: beqz a2, .LBB1_2
+; NOSFB-NEXT: # %bb.1:
+; NOSFB-NEXT: mv a1, a0
+; NOSFB-NEXT: .LBB1_2:
+; NOSFB-NEXT: mv a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: test2:
+; SFB: # %bb.0:
+; SFB-NEXT: bnez a2, .LBB1_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: mv a0, a1
+; SFB-NEXT: .LBB1_2:
+; SFB-NEXT: ret
+ %c = icmp eq i32 %z, 0
+ %b = select i1 %c, i32 %y, i32 %x
+ ret i32 %b
+}
+
+; Make sure we don't share the same basic block for two selects with the same
+; condition this would break the predication.
+define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOSFB-LABEL: test3:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: beqz a4, .LBB2_2
+; NOSFB-NEXT: # %bb.1:
+; NOSFB-NEXT: mv a1, a0
+; NOSFB-NEXT: mv a2, a3
+; NOSFB-NEXT: .LBB2_2:
+; NOSFB-NEXT: addw a0, a1, a2
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: test3:
+; SFB: # %bb.0:
+; SFB-NEXT: bnez a4, .LBB2_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: mv a0, a1
+; SFB-NEXT: .LBB2_2:
+; SFB-NEXT: beqz a4, .LBB2_4
+; SFB-NEXT: # %bb.3:
+; SFB-NEXT: mv a2, a3
+; SFB-NEXT: .LBB2_4:
+; SFB-NEXT: addw a0, a0, a2
+; SFB-NEXT: ret
+ %c = icmp eq i32 %z, 0
+ %a = select i1 %c, i32 %w, i32 %v
+ %b = select i1 %c, i32 %x, i32 %y
+ %d = add i32 %a, %b
+ ret i32 %d
+}
+
+; Test with false value 0.
+define signext i32 @test4(i32 signext %x, i32 signext %z) {
+; NOSFB-LABEL: test4:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: snez a1, a1
+; NOSFB-NEXT: addi a1, a1, -1
+; NOSFB-NEXT: and a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: test4:
+; SFB: # %bb.0:
+; SFB-NEXT: beqz a1, .LBB3_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: li a0, 0
+; SFB-NEXT: .LBB3_2:
+; SFB-NEXT: ret
+ %c = icmp eq i32 %z, 0
+ %b = select i1 %c, i32 %x, i32 0
+ ret i32 %b
+}
+
+; Same as above with select operands swapped.
+define signext i32 @test5(i32 signext %x, i32 signext %z) {
+; NOSFB-LABEL: test5:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: seqz a1, a1
+; NOSFB-NEXT: addi a1, a1, -1
+; NOSFB-NEXT: and a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: test5:
+; SFB: # %bb.0:
+; SFB-NEXT: bnez a1, .LBB4_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: li a0, 0
+; SFB-NEXT: .LBB4_2:
+; SFB-NEXT: ret
+ %c = icmp eq i32 %z, 0
+ %b = select i1 %c, i32 0, i32 %x
+ ret i32 %b
+}
+
+; Test with false value -1.
+define signext i32 @test6(i32 signext %x, i32 signext %z) {
+; NOSFB-LABEL: test6:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: seqz a1, a1
+; NOSFB-NEXT: addi a1, a1, -1
+; NOSFB-NEXT: or a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: test6:
+; SFB: # %bb.0:
+; SFB-NEXT: li a2, -1
+; SFB-NEXT: beqz a1, .LBB5_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: mv a0, a2
+; SFB-NEXT: .LBB5_2:
+; SFB-NEXT: ret
+ %c = icmp eq i32 %z, 0
+ %b = select i1 %c, i32 %x, i32 -1
+ ret i32 %b
+}
+
+; Same as above with select operands swapped.
+define signext i32 @test7(i32 signext %x, i32 signext %z) {
+; NOSFB-LABEL: test7:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: snez a1, a1
+; NOSFB-NEXT: addi a1, a1, -1
+; NOSFB-NEXT: or a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: test7:
+; SFB: # %bb.0:
+; SFB-NEXT: li a2, -1
+; SFB-NEXT: bnez a1, .LBB6_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: mv a0, a2
+; SFB-NEXT: .LBB6_2:
+; SFB-NEXT: ret
+ %c = icmp eq i32 %z, 0
+ %b = select i1 %c, i32 -1, i32 %x
+ ret i32 %b
+}
+
+define i16 @select_xor_1(i16 %A, i8 %cond) {
+; NOSFB-LABEL: select_xor_1:
+; NOSFB: # %bb.0: # %entry
+; NOSFB-NEXT: andi a1, a1, 1
+; NOSFB-NEXT: negw a1, a1
+; NOSFB-NEXT: andi a1, a1, 43
+; NOSFB-NEXT: xor a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: select_xor_1:
+; SFB: # %bb.0: # %entry
+; SFB-NEXT: andi a1, a1, 1
+; SFB-NEXT: xori a2, a0, 43
+; SFB-NEXT: beqz a1, .LBB7_2
+; SFB-NEXT: # %bb.1: # %entry
+; SFB-NEXT: mv a0, a2
+; SFB-NEXT: .LBB7_2: # %entry
+; SFB-NEXT: ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp eq i8 %and, 0
+ %0 = xor i16 %A, 43
+ %1 = select i1 %cmp10, i16 %A, i16 %0
+ ret i16 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i16 @select_xor_1b(i16 %A, i8 %cond) {
+; NOSFB-LABEL: select_xor_1b:
+; NOSFB: # %bb.0: # %entry
+; NOSFB-NEXT: andi a1, a1, 1
+; NOSFB-NEXT: negw a1, a1
+; NOSFB-NEXT: andi a1, a1, 43
+; NOSFB-NEXT: xor a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: select_xor_1b:
+; SFB: # %bb.0: # %entry
+; SFB-NEXT: andi a1, a1, 1
+; SFB-NEXT: xori a2, a0, 43
+; SFB-NEXT: beqz a1, .LBB8_2
+; SFB-NEXT: # %bb.1: # %entry
+; SFB-NEXT: mv a0, a2
+; SFB-NEXT: .LBB8_2: # %entry
+; SFB-NEXT: ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp ne i8 %and, 1
+ %0 = xor i16 %A, 43
+ %1 = select i1 %cmp10, i16 %A, i16 %0
+ ret i16 %1
+}
+
+define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) {
+; NOSFB-LABEL: select_xor_2:
+; NOSFB: # %bb.0: # %entry
+; NOSFB-NEXT: andi a2, a2, 1
+; NOSFB-NEXT: neg a2, a2
+; NOSFB-NEXT: and a1, a1, a2
+; NOSFB-NEXT: xor a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: select_xor_2:
+; SFB: # %bb.0: # %entry
+; SFB-NEXT: andi a2, a2, 1
+; SFB-NEXT: xor a1, a1, a0
+; SFB-NEXT: beqz a2, .LBB9_2
+; SFB-NEXT: # %bb.1: # %entry
+; SFB-NEXT: mv a0, a1
+; SFB-NEXT: .LBB9_2: # %entry
+; SFB-NEXT: ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp eq i8 %and, 0
+ %0 = xor i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
+; NOSFB-LABEL: select_xor_2b:
+; NOSFB: # %bb.0: # %entry
+; NOSFB-NEXT: andi a2, a2, 1
+; NOSFB-NEXT: neg a2, a2
+; NOSFB-NEXT: and a1, a1, a2
+; NOSFB-NEXT: xor a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: select_xor_2b:
+; SFB: # %bb.0: # %entry
+; SFB-NEXT: andi a2, a2, 1
+; SFB-NEXT: xor a1, a1, a0
+; SFB-NEXT: beqz a2, .LBB10_2
+; SFB-NEXT: # %bb.1: # %entry
+; SFB-NEXT: mv a0, a1
+; SFB-NEXT: .LBB10_2: # %entry
+; SFB-NEXT: ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp ne i8 %and, 1
+ %0 = xor i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+define i32 @select_or(i32 %A, i32 %B, i8 %cond) {
+; NOSFB-LABEL: select_or:
+; NOSFB: # %bb.0: # %entry
+; NOSFB-NEXT: andi a2, a2, 1
+; NOSFB-NEXT: neg a2, a2
+; NOSFB-NEXT: and a1, a1, a2
+; NOSFB-NEXT: or a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: select_or:
+; SFB: # %bb.0: # %entry
+; SFB-NEXT: andi a2, a2, 1
+; SFB-NEXT: or a1, a1, a0
+; SFB-NEXT: beqz a2, .LBB11_2
+; SFB-NEXT: # %bb.1: # %entry
+; SFB-NEXT: mv a0, a1
+; SFB-NEXT: .LBB11_2: # %entry
+; SFB-NEXT: ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp eq i8 %and, 0
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
+; NOSFB-LABEL: select_or_b:
+; NOSFB: # %bb.0: # %entry
+; NOSFB-NEXT: andi a2, a2, 1
+; NOSFB-NEXT: neg a2, a2
+; NOSFB-NEXT: and a1, a1, a2
+; NOSFB-NEXT: or a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: select_or_b:
+; SFB: # %bb.0: # %entry
+; SFB-NEXT: andi a2, a2, 1
+; SFB-NEXT: or a1, a1, a0
+; SFB-NEXT: beqz a2, .LBB12_2
+; SFB-NEXT: # %bb.1: # %entry
+; SFB-NEXT: mv a0, a1
+; SFB-NEXT: .LBB12_2: # %entry
+; SFB-NEXT: ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp ne i8 %and, 1
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) {
+; NOSFB-LABEL: select_or_1:
+; NOSFB: # %bb.0: # %entry
+; NOSFB-NEXT: andi a2, a2, 1
+; NOSFB-NEXT: neg a2, a2
+; NOSFB-NEXT: and a1, a1, a2
+; NOSFB-NEXT: or a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: select_or_1:
+; SFB: # %bb.0: # %entry
+; SFB-NEXT: andi a2, a2, 1
+; SFB-NEXT: or a1, a1, a0
+; SFB-NEXT: beqz a2, .LBB13_2
+; SFB-NEXT: # %bb.1: # %entry
+; SFB-NEXT: mv a0, a1
+; SFB-NEXT: .LBB13_2: # %entry
+; SFB-NEXT: ret
+entry:
+ %and = and i32 %cond, 1
+ %cmp10 = icmp eq i32 %and, 0
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
+; NOSFB-LABEL: select_or_1b:
+; NOSFB: # %bb.0: # %entry
+; NOSFB-NEXT: andi a2, a2, 1
+; NOSFB-NEXT: neg a2, a2
+; NOSFB-NEXT: and a1, a1, a2
+; NOSFB-NEXT: or a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: select_or_1b:
+; SFB: # %bb.0: # %entry
+; SFB-NEXT: andi a2, a2, 1
+; SFB-NEXT: or a1, a1, a0
+; SFB-NEXT: beqz a2, .LBB14_2
+; SFB-NEXT: # %bb.1: # %entry
+; SFB-NEXT: mv a0, a1
+; SFB-NEXT: .LBB14_2: # %entry
+; SFB-NEXT: ret
+entry:
+ %and = and i32 %cond, 1
+ %cmp10 = icmp ne i32 %and, 1
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
More information about the llvm-commits
mailing list