[llvm] 2b32e4f - [RISCV] Add basic support for the sifive-7-series short forward branch optimization.

Mon Oct 17 13:57:15 PDT 2022

Author: Craig Topper
Date: 2022-10-17T13:56:22-07:00
New Revision: 2b32e4f98b4f0e9e6a7c301b627ff75d9af8699e

URL: https://github.com/llvm/llvm-project/commit/2b32e4f98b4f0e9e6a7c301b627ff75d9af8699e
DIFF: https://github.com/llvm/llvm-project/commit/2b32e4f98b4f0e9e6a7c301b627ff75d9af8699e.diff

LOG: [RISCV] Add basic support for the sifive-7-series short forward branch optimization.

sifive-7-series has macrofusion support to convert a branch over
a single instruction into a conditional instruction. This can be
an improvement if the branch is hard to predict.

This patch adds support for the most basic case, a branch over a
move instruction. This is implemented as a pseudo instruction so
we can hide the control flow until all code motion passes complete.

I've disabled a recent select optimization if this feature is enabled
in the subtarget.

Related gcc patch for the same optimization https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg211045.html

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D135814

Added: 
    llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll

Modified: 
    llvm/lib/Target/RISCV/RISCV.td
    llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfo.td
    llvm/lib/Target/RISCV/RISCVSchedRocket.td
    llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
    llvm/lib/Target/RISCV/RISCVSchedule.td
    llvm/lib/Target/RISCV/RISCVSubtarget.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 438c614d259ce..822b2c44c2309 100644

--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -449,9 +449,18 @@ def TuneNoDefaultUnroll
     : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
                        "Disable default unroll preference.">;
 
+// SiFive 7 is able to fuse integer ALU operations with a preceding branch
+// instruction.
+def TuneShortForwardBranchOpt
+    : SubtargetFeature<"short-forward-branch-opt", "HasShortForwardBranchOpt",
+                       "true", "Enable short forward branch optimization">;
+def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
+def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;
+
 def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
                                    "SiFive 7-Series processors",
-                                   [TuneNoDefaultUnroll]>;
+                                   [TuneNoDefaultUnroll,
+                                    TuneShortForwardBranchOpt]>;
 
 // Assume that lock-free native-width atomics are available, even if the target
 // and operating system combination would not usually provide them. The user

diff  --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index b5d5ffee7b857..83ec165474a46 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -45,6 +45,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
   bool expandMBB(MachineBasicBlock &MBB);
   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                 MachineBasicBlock::iterator &NextMBBI);
+  bool expandCCOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                  MachineBasicBlock::iterator &NextMBBI);
   bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
   bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator MBBI, unsigned Opcode);
@@ -82,6 +84,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
   // expanded instructions for each pseudo is correct in the Size field of the
   // tablegen definition for the pseudo.
   switch (MBBI->getOpcode()) {
+  case RISCV::PseudoCCMOVGPR:
+    return expandCCOp(MBB, MBBI, NextMBBI);
   case RISCV::PseudoVSETVLI:
   case RISCV::PseudoVSETVLIX0:
   case RISCV::PseudoVSETIVLI:
@@ -133,6 +137,60 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
   return false;
 }
 
+bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   MachineBasicBlock::iterator &NextMBBI) {
+  assert(MBBI->getOpcode() == RISCV::PseudoCCMOVGPR && "Unexpected opcode");
+
+  MachineFunction *MF = MBB.getParent();
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  MachineBasicBlock *MergeBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  MF->insert(++MBB.getIterator(), TrueBB);
+  MF->insert(++TrueBB->getIterator(), MergeBB);
+
+  // We want to copy the "true" value when the condition is true which means
+  // we need to invert the branch condition to jump over TrueBB when the
+  // condition is false.
+  auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
+  CC = RISCVCC::getOppositeBranchCondition(CC);
+
+  // Insert branch instruction.
+  BuildMI(MBB, MBBI, DL, TII->getBrCond(CC))
+      .addReg(MI.getOperand(1).getReg())
+      .addReg(MI.getOperand(2).getReg())
+      .addMBB(MergeBB);
+
+  Register DestReg = MI.getOperand(0).getReg();
+  assert(MI.getOperand(4).getReg() == DestReg);
+
+  // Add MV.
+  BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg)
+      .add(MI.getOperand(5))
+      .addImm(0);
+
+  TrueBB->addSuccessor(MergeBB);
+
+  MergeBB->splice(MergeBB->end(), &MBB, MI, MBB.end());
+  MergeBB->transferSuccessors(&MBB);
+
+  MBB.addSuccessor(TrueBB);
+  MBB.addSuccessor(MergeBB);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+
+  // Make sure live-ins are correctly attached to this new basic block.
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *TrueBB);
+  computeAndAddLiveIns(LiveRegs, *MergeBB);
+
+  return true;
+}
+
 bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI) {
   assert(MBBI->getNumExplicitOperands() == 3 && MBBI->getNumOperands() >= 5 &&

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 874fe95c0a95b..0670b89539941 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4222,28 +4222,30 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
   }
 
-  // (select c, -1, y) -> -c | y
-  if (isAllOnesConstant(TrueV)) {
-    SDValue Neg = DAG.getNegative(CondV, DL, VT);
-    return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
-  }
-  // (select c, y, -1) -> (c-1) | y
-  if (isAllOnesConstant(FalseV)) {
-    SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
-                              DAG.getAllOnesConstant(DL, VT));
-    return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
-  }
+  if (!Subtarget.hasShortForwardBranchOpt()) {
+    // (select c, -1, y) -> -c | y
+    if (isAllOnesConstant(TrueV)) {
+      SDValue Neg = DAG.getNegative(CondV, DL, VT);
+      return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
+    }
+    // (select c, y, -1) -> (c-1) | y
+    if (isAllOnesConstant(FalseV)) {
+      SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
+                                DAG.getAllOnesConstant(DL, VT));
+      return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
+    }
 
-  // (select c, 0, y) -> (c-1) & y
-  if (isNullConstant(TrueV)) {
-    SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
-                              DAG.getAllOnesConstant(DL, VT));
-    return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
-  }
-  // (select c, y, 0) -> -c & y
-  if (isNullConstant(FalseV)) {
-    SDValue Neg = DAG.getNegative(CondV, DL, VT);
-    return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+    // (select c, 0, y) -> (c-1) & y
+    if (isNullConstant(TrueV)) {
+      SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
+                                DAG.getAllOnesConstant(DL, VT));
+      return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
+    }
+    // (select c, y, 0) -> -c & y
+    if (isNullConstant(FalseV)) {
+      SDValue Neg = DAG.getNegative(CondV, DL, VT);
+      return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+    }
   }
 
   // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
@@ -9450,9 +9452,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     // (select (x in [0,1] != 0), (z ^ y), y ) -> (-x & z ) ^ y
     // (select (x in [0,1] == 0), y, (z | y) ) -> (-x & z ) | y
     // (select (x in [0,1] != 0), (z | y), y ) -> (-x & z ) | y
+    // NOTE: We only do this if the target does not have the short forward
+    // branch optimization.
     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
-    if (isNullConstant(RHS) && ISD::isIntEqualitySetCC(CCVal) &&
-        DAG.MaskedValueIsZero(LHS, Mask)) {
+    if (!Subtarget.hasShortForwardBranchOpt() && isNullConstant(RHS) &&
+        ISD::isIntEqualitySetCC(CCVal) && DAG.MaskedValueIsZero(LHS, Mask)) {
       unsigned Opcode;
       SDValue Src1, Src2;
       // true if FalseV is XOR or OR operator and one of its operands
@@ -9504,35 +9508,36 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
                          {LHS, RHS, CC, TrueV, FalseV});
 
-    // (select c, -1, y) -> -c | y
-    if (isAllOnesConstant(TrueV)) {
-      SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
-      SDValue Neg = DAG.getNegative(C, DL, VT);
-      return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
-    }
-    // (select c, y, -1) -> -!c | y
-    if (isAllOnesConstant(FalseV)) {
-      SDValue C = DAG.getSetCC(DL, VT, LHS, RHS,
-                               ISD::getSetCCInverse(CCVal, VT));
-      SDValue Neg = DAG.getNegative(C, DL, VT);
-      return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
-    }
+    if (!Subtarget.hasShortForwardBranchOpt()) {
+      // (select c, -1, y) -> -c | y
+      if (isAllOnesConstant(TrueV)) {
+        SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
+        SDValue Neg = DAG.getNegative(C, DL, VT);
+        return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
+      }
+      // (select c, y, -1) -> -!c | y
+      if (isAllOnesConstant(FalseV)) {
+        SDValue C =
+            DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
+        SDValue Neg = DAG.getNegative(C, DL, VT);
+        return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
+      }
 
-    // (select c, 0, y) -> -!c & y
-    if (isNullConstant(TrueV)) {
-      SDValue C = DAG.getSetCC(DL, VT, LHS, RHS,
-                               ISD::getSetCCInverse(CCVal, VT));
-      SDValue Neg = DAG.getNegative(C, DL, VT);
-      return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
-    }
-    // (select c, y, 0) -> -c & y
-    if (isNullConstant(FalseV)) {
-      SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
-      SDValue Neg = DAG.getNegative(C, DL, VT);
-      return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+      // (select c, 0, y) -> -!c & y
+      if (isNullConstant(TrueV)) {
+        SDValue C =
+            DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
+        SDValue Neg = DAG.getNegative(C, DL, VT);
+        return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
+      }
+      // (select c, y, 0) -> -c & y
+      if (isNullConstant(FalseV)) {
+        SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
+        SDValue Neg = DAG.getNegative(C, DL, VT);
+        return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+      }
     }
 
-
     return SDValue();
   }
   case RISCVISD::BR_CC: {

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 05b0c016c943c..7ae0ceee9aad0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1616,6 +1616,9 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
     return false;
 
   switch (MI.getOpcode()) {
+  case RISCV::PseudoCCMOVGPR:
+    // Operands 4 and 5 are commutable.
+    return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
   case CASE_VFMA_SPLATS(FMADD):
   case CASE_VFMA_SPLATS(FMSUB):
   case CASE_VFMA_SPLATS(FMACC):
@@ -1761,6 +1764,15 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
   };
 
   switch (MI.getOpcode()) {
+  case RISCV::PseudoCCMOVGPR: {
+    // CCMOV can be commuted by inverting the condition.
+    auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
+    CC = RISCVCC::getOppositeBranchCondition(CC);
+    auto &WorkingMI = cloneIfNew(MI);
+    WorkingMI.getOperand(3).setImm(CC);
+    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
+                                                   OpIdx1, OpIdx2);
+  }
   case CASE_VFMA_SPLATS(FMACC):
   case CASE_VFMA_SPLATS(FMADD):
   case CASE_VFMA_SPLATS(FMSAC):

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 4c8d54cc2c4ed..fb3bcf429d9a0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1313,6 +1313,20 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
                                                   node:$falsev), [{}],
                                   IntCCtoRISCVCC>;
 
+let Predicates = [HasShortForwardBranchOpt],
+    Constraints = "$dst = $falsev", isCommutable = 1, Size = 8 in {
+// This instruction moves $truev to $dst when the condition is true. It will
+// be expanded to control flow in RISCVExpandPseudoInsts.
+def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst),
+                            (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                             GPR:$falsev, GPR:$truev),
+                            [(set GPR:$dst,
+                              (riscv_selectcc_frag:$cc GPR:$lhs, GPR:$rhs,
+                                                       cond, GPR:$truev,
+                                                       GPR:$falsev))]>,
+                     Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+}
+
 multiclass SelectCC_GPR_rrirr<RegisterClass valty> {
   let usesCustomInserter = 1 in
   def _Using_CC_GPR : Pseudo<(outs valty:$dst),
@@ -1329,6 +1343,7 @@ multiclass SelectCC_GPR_rrirr<RegisterClass valty> {
              (IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>;
 }
 
+let Predicates = [NoShortForwardBranchOpt] in
 defm Select_GPR : SelectCC_GPR_rrirr<GPR>;
 
 class SelectCompressOpt<CondCode Cond>: Pat<(riscv_selectcc_frag:$select GPR:$lhs, simm12_no6:$Constant, Cond,

diff  --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 4bfd352edca3d..e39585ff08412 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -246,4 +246,5 @@ defm : UnsupportedSchedZbs;
 defm : UnsupportedSchedZbkb;
 defm : UnsupportedSchedZbkx;
 defm : UnsupportedSchedZfh;
+defm : UnsupportedSchedSFB;
 }

diff  --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 32e2b1f6d1e4e..17df9e212eb81 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -44,6 +44,12 @@ def : WriteRes<WriteJal, [SiFive7PipeB]>;
 def : WriteRes<WriteJalr, [SiFive7PipeB]>;
 def : WriteRes<WriteJmpReg, [SiFive7PipeB]>;
 
+//Short forward branch
+def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
+  let Latency = 3;
+  let NumMicroOps = 2;
+}
+
 // Integer arithmetic and logic
 let Latency = 3 in {
 def : WriteRes<WriteIALU, [SiFive7PipeAB]>;
@@ -223,6 +229,8 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
 def : ReadAdvance<ReadFClass32, 0>;
 def : ReadAdvance<ReadFClass64, 0>;
 
+def : ReadAdvance<ReadSFB, 0>;
+
 //===----------------------------------------------------------------------===//
 // Unsupported extensions
 defm : UnsupportedSchedV;

diff  --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index 8544e0012a767..0437f78c3dafd 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -105,6 +105,10 @@ def WriteFST16        : SchedWrite;    // Floating point sp store
 def WriteFST32        : SchedWrite;    // Floating point sp store
 def WriteFST64        : SchedWrite;    // Floating point dp store
 
+// short forward branch for Bullet
+def WriteSFB        : SchedWrite;
+def ReadSFB         : SchedRead;
+
 /// Define scheduler resources associated with use operands.
 def ReadJmp         : SchedRead;
 def ReadJalr        : SchedRead;
@@ -229,6 +233,14 @@ def : ReadAdvance<ReadFSqrt16, 0>;
 } // Unsupported = true
 }
 
+multiclass UnsupportedSchedSFB {
+let Unsupported = true in {
+def : WriteRes<WriteSFB, []>;
+
+def : ReadAdvance<ReadSFB, 0>;
+} // Unsupported = true
+}
+
 // Include the scheduler resources for other instruction extensions.
 include "RISCVScheduleZb.td"
 include "RISCVScheduleV.td"

diff  --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 786194609e12c..89fb7cb212a87 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -96,6 +96,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool EnableDefaultUnroll = true;
   bool EnableSaveRestore = false;
   bool EnableUnalignedScalarMem = false;
+  bool HasShortForwardBranchOpt = false;
   bool HasLUIADDIFusion = false;
   bool HasForcedAtomics = false;
   unsigned XLen = 32;
@@ -190,6 +191,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; }
   bool enableDefaultUnroll() const { return EnableDefaultUnroll; }
   bool enableSaveRestore() const { return EnableSaveRestore; }
+  bool hasShortForwardBranchOpt() const { return HasShortForwardBranchOpt; }
   bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
   bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
   bool hasForcedAtomics() const { return HasForcedAtomics; }

diff  --git a/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll
new file mode 100644
index 0000000000000..ccb4c99837ec8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/short-foward-branch-opt.ll
@@ -0,0 +1,385 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+c -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=NOSFB %s
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=SFB %s
+
+; The sifive-7-series can predicate a mv.
+
+define signext i32 @test1(i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOSFB-LABEL: test1:
+; NOSFB:       # %bb.0:
+; NOSFB-NEXT:    beqz a2, .LBB0_2
+; NOSFB-NEXT:  # %bb.1:
+; NOSFB-NEXT:    mv a0, a1
+; NOSFB-NEXT:  .LBB0_2:
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: test1:
+; SFB:       # %bb.0:
+; SFB-NEXT:    beqz a2, .LBB0_2
+; SFB-NEXT:  # %bb.1:
+; SFB-NEXT:    mv a0, a1
+; SFB-NEXT:  .LBB0_2:
+; SFB-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %b = select i1 %c, i32 %x, i32 %y
+  ret i32 %b
+}
+
+; Same as above with select operands swapped.
+define signext i32 @test2(i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOSFB-LABEL: test2:
+; NOSFB:       # %bb.0:
+; NOSFB-NEXT:    beqz a2, .LBB1_2
+; NOSFB-NEXT:  # %bb.1:
+; NOSFB-NEXT:    mv a1, a0
+; NOSFB-NEXT:  .LBB1_2:
+; NOSFB-NEXT:    mv a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: test2:
+; SFB:       # %bb.0:
+; SFB-NEXT:    bnez a2, .LBB1_2
+; SFB-NEXT:  # %bb.1:
+; SFB-NEXT:    mv a0, a1
+; SFB-NEXT:  .LBB1_2:
+; SFB-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %b = select i1 %c, i32 %y, i32 %x
+  ret i32 %b
+}
+
+; Make sure we don't share the same basic block for two selects with the same
+; condition this would break the predication.
+define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOSFB-LABEL: test3:
+; NOSFB:       # %bb.0:
+; NOSFB-NEXT:    beqz a4, .LBB2_2
+; NOSFB-NEXT:  # %bb.1:
+; NOSFB-NEXT:    mv a1, a0
+; NOSFB-NEXT:    mv a2, a3
+; NOSFB-NEXT:  .LBB2_2:
+; NOSFB-NEXT:    addw a0, a1, a2
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: test3:
+; SFB:       # %bb.0:
+; SFB-NEXT:    bnez a4, .LBB2_2
+; SFB-NEXT:  # %bb.1:
+; SFB-NEXT:    mv a0, a1
+; SFB-NEXT:  .LBB2_2:
+; SFB-NEXT:    beqz a4, .LBB2_4
+; SFB-NEXT:  # %bb.3:
+; SFB-NEXT:    mv a2, a3
+; SFB-NEXT:  .LBB2_4:
+; SFB-NEXT:    addw a0, a0, a2
+; SFB-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %a = select i1 %c, i32 %w, i32 %v
+  %b = select i1 %c, i32 %x, i32 %y
+  %d = add i32 %a, %b
+  ret i32 %d
+}
+
+; Test with false value 0.
+define signext i32 @test4(i32 signext %x, i32 signext %z) {
+; NOSFB-LABEL: test4:
+; NOSFB:       # %bb.0:
+; NOSFB-NEXT:    snez a1, a1
+; NOSFB-NEXT:    addi a1, a1, -1
+; NOSFB-NEXT:    and a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: test4:
+; SFB:       # %bb.0:
+; SFB-NEXT:    beqz a1, .LBB3_2
+; SFB-NEXT:  # %bb.1:
+; SFB-NEXT:    li a0, 0
+; SFB-NEXT:  .LBB3_2:
+; SFB-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %b = select i1 %c, i32 %x, i32 0
+  ret i32 %b
+}
+
+; Same as above with select operands swapped.
+define signext i32 @test5(i32 signext %x, i32 signext %z) {
+; NOSFB-LABEL: test5:
+; NOSFB:       # %bb.0:
+; NOSFB-NEXT:    seqz a1, a1
+; NOSFB-NEXT:    addi a1, a1, -1
+; NOSFB-NEXT:    and a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: test5:
+; SFB:       # %bb.0:
+; SFB-NEXT:    bnez a1, .LBB4_2
+; SFB-NEXT:  # %bb.1:
+; SFB-NEXT:    li a0, 0
+; SFB-NEXT:  .LBB4_2:
+; SFB-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %b = select i1 %c, i32 0, i32 %x
+  ret i32 %b
+}
+
+; Test with false value -1.
+define signext i32 @test6(i32 signext %x, i32 signext %z) {
+; NOSFB-LABEL: test6:
+; NOSFB:       # %bb.0:
+; NOSFB-NEXT:    seqz a1, a1
+; NOSFB-NEXT:    addi a1, a1, -1
+; NOSFB-NEXT:    or a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: test6:
+; SFB:       # %bb.0:
+; SFB-NEXT:    li a2, -1
+; SFB-NEXT:    beqz a1, .LBB5_2
+; SFB-NEXT:  # %bb.1:
+; SFB-NEXT:    mv a0, a2
+; SFB-NEXT:  .LBB5_2:
+; SFB-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %b = select i1 %c, i32 %x, i32 -1
+  ret i32 %b
+}
+
+; Same as above with select operands swapped.
+define signext i32 @test7(i32 signext %x, i32 signext %z) {
+; NOSFB-LABEL: test7:
+; NOSFB:       # %bb.0:
+; NOSFB-NEXT:    snez a1, a1
+; NOSFB-NEXT:    addi a1, a1, -1
+; NOSFB-NEXT:    or a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: test7:
+; SFB:       # %bb.0:
+; SFB-NEXT:    li a2, -1
+; SFB-NEXT:    bnez a1, .LBB6_2
+; SFB-NEXT:  # %bb.1:
+; SFB-NEXT:    mv a0, a2
+; SFB-NEXT:  .LBB6_2:
+; SFB-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %b = select i1 %c, i32 -1, i32 %x
+  ret i32 %b
+}
+
+define i16 @select_xor_1(i16 %A, i8 %cond) {
+; NOSFB-LABEL: select_xor_1:
+; NOSFB:       # %bb.0: # %entry
+; NOSFB-NEXT:    andi a1, a1, 1
+; NOSFB-NEXT:    negw a1, a1
+; NOSFB-NEXT:    andi a1, a1, 43
+; NOSFB-NEXT:    xor a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: select_xor_1:
+; SFB:       # %bb.0: # %entry
+; SFB-NEXT:    andi a1, a1, 1
+; SFB-NEXT:    xori a2, a0, 43
+; SFB-NEXT:    beqz a1, .LBB7_2
+; SFB-NEXT:  # %bb.1: # %entry
+; SFB-NEXT:    mv a0, a2
+; SFB-NEXT:  .LBB7_2: # %entry
+; SFB-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp eq i8 %and, 0
+ %0 = xor i16 %A, 43
+ %1 = select i1 %cmp10, i16 %A, i16 %0
+ ret i16 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i16 @select_xor_1b(i16 %A, i8 %cond) {
+; NOSFB-LABEL: select_xor_1b:
+; NOSFB:       # %bb.0: # %entry
+; NOSFB-NEXT:    andi a1, a1, 1
+; NOSFB-NEXT:    negw a1, a1
+; NOSFB-NEXT:    andi a1, a1, 43
+; NOSFB-NEXT:    xor a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: select_xor_1b:
+; SFB:       # %bb.0: # %entry
+; SFB-NEXT:    andi a1, a1, 1
+; SFB-NEXT:    xori a2, a0, 43
+; SFB-NEXT:    beqz a1, .LBB8_2
+; SFB-NEXT:  # %bb.1: # %entry
+; SFB-NEXT:    mv a0, a2
+; SFB-NEXT:  .LBB8_2: # %entry
+; SFB-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp ne i8 %and, 1
+ %0 = xor i16 %A, 43
+ %1 = select i1 %cmp10, i16 %A, i16 %0
+ ret i16 %1
+}
+
+define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) {
+; NOSFB-LABEL: select_xor_2:
+; NOSFB:       # %bb.0: # %entry
+; NOSFB-NEXT:    andi a2, a2, 1
+; NOSFB-NEXT:    neg a2, a2
+; NOSFB-NEXT:    and a1, a1, a2
+; NOSFB-NEXT:    xor a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: select_xor_2:
+; SFB:       # %bb.0: # %entry
+; SFB-NEXT:    andi a2, a2, 1
+; SFB-NEXT:    xor a1, a1, a0
+; SFB-NEXT:    beqz a2, .LBB9_2
+; SFB-NEXT:  # %bb.1: # %entry
+; SFB-NEXT:    mv a0, a1
+; SFB-NEXT:  .LBB9_2: # %entry
+; SFB-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp eq i8 %and, 0
+ %0 = xor i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
+; NOSFB-LABEL: select_xor_2b:
+; NOSFB:       # %bb.0: # %entry
+; NOSFB-NEXT:    andi a2, a2, 1
+; NOSFB-NEXT:    neg a2, a2
+; NOSFB-NEXT:    and a1, a1, a2
+; NOSFB-NEXT:    xor a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: select_xor_2b:
+; SFB:       # %bb.0: # %entry
+; SFB-NEXT:    andi a2, a2, 1
+; SFB-NEXT:    xor a1, a1, a0
+; SFB-NEXT:    beqz a2, .LBB10_2
+; SFB-NEXT:  # %bb.1: # %entry
+; SFB-NEXT:    mv a0, a1
+; SFB-NEXT:  .LBB10_2: # %entry
+; SFB-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp ne i8 %and, 1
+ %0 = xor i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+define i32 @select_or(i32 %A, i32 %B, i8 %cond) {
+; NOSFB-LABEL: select_or:
+; NOSFB:       # %bb.0: # %entry
+; NOSFB-NEXT:    andi a2, a2, 1
+; NOSFB-NEXT:    neg a2, a2
+; NOSFB-NEXT:    and a1, a1, a2
+; NOSFB-NEXT:    or a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: select_or:
+; SFB:       # %bb.0: # %entry
+; SFB-NEXT:    andi a2, a2, 1
+; SFB-NEXT:    or a1, a1, a0
+; SFB-NEXT:    beqz a2, .LBB11_2
+; SFB-NEXT:  # %bb.1: # %entry
+; SFB-NEXT:    mv a0, a1
+; SFB-NEXT:  .LBB11_2: # %entry
+; SFB-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp eq i8 %and, 0
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
+; NOSFB-LABEL: select_or_b:
+; NOSFB:       # %bb.0: # %entry
+; NOSFB-NEXT:    andi a2, a2, 1
+; NOSFB-NEXT:    neg a2, a2
+; NOSFB-NEXT:    and a1, a1, a2
+; NOSFB-NEXT:    or a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: select_or_b:
+; SFB:       # %bb.0: # %entry
+; SFB-NEXT:    andi a2, a2, 1
+; SFB-NEXT:    or a1, a1, a0
+; SFB-NEXT:    beqz a2, .LBB12_2
+; SFB-NEXT:  # %bb.1: # %entry
+; SFB-NEXT:    mv a0, a1
+; SFB-NEXT:  .LBB12_2: # %entry
+; SFB-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp ne i8 %and, 1
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) {
+; NOSFB-LABEL: select_or_1:
+; NOSFB:       # %bb.0: # %entry
+; NOSFB-NEXT:    andi a2, a2, 1
+; NOSFB-NEXT:    neg a2, a2
+; NOSFB-NEXT:    and a1, a1, a2
+; NOSFB-NEXT:    or a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: select_or_1:
+; SFB:       # %bb.0: # %entry
+; SFB-NEXT:    andi a2, a2, 1
+; SFB-NEXT:    or a1, a1, a0
+; SFB-NEXT:    beqz a2, .LBB13_2
+; SFB-NEXT:  # %bb.1: # %entry
+; SFB-NEXT:    mv a0, a1
+; SFB-NEXT:  .LBB13_2: # %entry
+; SFB-NEXT:    ret
+entry:
+ %and = and i32 %cond, 1
+ %cmp10 = icmp eq i32 %and, 0
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
+; NOSFB-LABEL: select_or_1b:
+; NOSFB:       # %bb.0: # %entry
+; NOSFB-NEXT:    andi a2, a2, 1
+; NOSFB-NEXT:    neg a2, a2
+; NOSFB-NEXT:    and a1, a1, a2
+; NOSFB-NEXT:    or a0, a0, a1
+; NOSFB-NEXT:    ret
+;
+; SFB-LABEL: select_or_1b:
+; SFB:       # %bb.0: # %entry
+; SFB-NEXT:    andi a2, a2, 1
+; SFB-NEXT:    or a1, a1, a0
+; SFB-NEXT:    beqz a2, .LBB14_2
+; SFB-NEXT:  # %bb.1: # %entry
+; SFB-NEXT:    mv a0, a1
+; SFB-NEXT:  .LBB14_2: # %entry
+; SFB-NEXT:    ret
+entry:
+ %and = and i32 %cond, 1
+ %cmp10 = icmp ne i32 %and, 1
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}