[llvm] faa326d - [RISCV] Add branch+c.mv macrofusion for sifive-p450. (#76169)

Mon Jan 8 15:23:30 PST 2024

Author: Craig Topper
Date: 2024-01-08T15:23:26-08:00
New Revision: faa326de97bf6119dcc42806b07f3523c521ae96

URL: https://github.com/llvm/llvm-project/commit/faa326de97bf6119dcc42806b07f3523c521ae96
DIFF: https://github.com/llvm/llvm-project/commit/faa326de97bf6119dcc42806b07f3523c521ae96.diff

LOG: [RISCV] Add branch+c.mv macrofusion for sifive-p450. (#76169)

sifive-p450 supports a very restricted version of the short forward
branch optimization from the sifive-7-series.

For sifive-p450, a branch over a single c.mv can be macrofused as a
conditional move operation. Due to encoding restrictions on c.mv, we
can't conditionally move from X0. That would require c.li instead.

Added: 
    llvm/test/CodeGen/RISCV/cmov-branch-opt.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
    llvm/lib/Target/RISCV/RISCVFeatures.td
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfo.td
    llvm/lib/Target/RISCV/RISCVProcessors.td
    llvm/lib/Target/RISCV/RISCVSubtarget.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 24a13f93af880e..a39f0671a6dc28 100644

--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -109,6 +109,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
     return expandRV32ZdinxStore(MBB, MBBI);
   case RISCV::PseudoRV32ZdinxLD:
     return expandRV32ZdinxLoad(MBB, MBBI);
+  case RISCV::PseudoCCMOVGPRNoX0:
   case RISCV::PseudoCCMOVGPR:
   case RISCV::PseudoCCADD:
   case RISCV::PseudoCCSUB:
@@ -191,7 +192,8 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
   Register DestReg = MI.getOperand(0).getReg();
   assert(MI.getOperand(4).getReg() == DestReg);
 
-  if (MI.getOpcode() == RISCV::PseudoCCMOVGPR) {
+  if (MI.getOpcode() == RISCV::PseudoCCMOVGPR ||
+      MI.getOpcode() == RISCV::PseudoCCMOVGPRNoX0) {
     // Add MV.
     BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg)
         .add(MI.getOperand(5))

diff  --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 59b202606dadaf..bb7a3291085d43 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1021,6 +1021,12 @@ def TuneShortForwardBranchOpt
 def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
 def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;
 
+def TuneConditionalCompressedMoveFusion
+    : SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion",
+                       "true", "Enable branch+c.mv fusion">;
+def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">;
+def NoConditionalMoveFusion  : Predicate<"!Subtarget->hasConditionalMoveFusion()">;
+
 def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
                                    "SiFive 7-Series processors",
                                    [TuneNoDefaultUnroll,

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 79c16cf4c4c361..135b41c7a08502 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6920,7 +6920,7 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
   MVT VT = N->getSimpleValueType(0);
   SDLoc DL(N);
 
-  if (!Subtarget.hasShortForwardBranchOpt()) {
+  if (!Subtarget.hasConditionalMoveFusion()) {
     // (select c, -1, y) -> -c | y
     if (isAllOnesConstant(TrueV)) {
       SDValue Neg = DAG.getNegative(CondV, DL, VT);
@@ -7084,7 +7084,7 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 
     // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
     // Unless we have the short forward branch optimization.
-    if (!Subtarget.hasShortForwardBranchOpt())
+    if (!Subtarget.hasConditionalMoveFusion())
       return DAG.getNode(
           ISD::OR, DL, VT,
           DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
@@ -12209,7 +12209,7 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
   if (VT.isVector())
     return SDValue();
 
-  if (!Subtarget.hasShortForwardBranchOpt()) {
+  if (!Subtarget.hasConditionalMoveFusion()) {
     // (select cond, x, (and x, c)) has custom lowering with Zicond.
     if ((!Subtarget.hasStdExtZicond() &&
          !Subtarget.hasVendorXVentanaCondOps()) ||
@@ -14440,7 +14440,7 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
     return V;
 
-  if (Subtarget.hasShortForwardBranchOpt())
+  if (Subtarget.hasConditionalMoveFusion())
     return SDValue();
 
   SDValue TrueVal = N->getOperand(1);
@@ -15178,7 +15178,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
                          {LHS, RHS, CC, TrueV, FalseV});
 
-    if (!Subtarget.hasShortForwardBranchOpt()) {
+    if (!Subtarget.hasConditionalMoveFusion()) {
       // (select c, -1, y) -> -c | y
       if (isAllOnesConstant(TrueV)) {
         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 7f6a045a7d042f..a24e8b2d18cb65 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2650,6 +2650,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
   case RISCV::TH_MULSH:
     // Operands 2 and 3 are commutable.
     return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
+  case RISCV::PseudoCCMOVGPRNoX0:
   case RISCV::PseudoCCMOVGPR:
     // Operands 4 and 5 are commutable.
     return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
@@ -2806,6 +2807,7 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
                                                    OpIdx2);
   }
+  case RISCV::PseudoCCMOVGPRNoX0:
   case RISCV::PseudoCCMOVGPR: {
     // CCMOV can be commuted by inverting the condition.
     auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 2f4744529469bd..e274e9f3898fb7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1371,6 +1371,24 @@ def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst),
                             ReadSFBALU, ReadSFBALU]>;
 }
 
+// This should always expand to a branch+c.mv so the size is 6 or 4 if the
+// branch is compressible.
+let Predicates = [HasConditionalMoveFusion, NoShortForwardBranchOpt],
+    Constraints = "$dst = $falsev", isCommutable = 1, Size = 6 in {
+// This instruction moves $truev to $dst when the condition is true. It will
+// be expanded to control flow in RISCVExpandPseudoInsts.
+// We use GPRNoX0 because c.mv cannot encode X0.
+def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst),
+                                (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                                 GPRNoX0:$falsev, GPRNoX0:$truev),
+                                [(set GPRNoX0:$dst,
+                                  (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs),
+                                                           (XLenVT GPR:$rhs),
+                                                           cond, (XLenVT GPRNoX0:$truev),
+                                                           (XLenVT GPRNoX0:$falsev)))]>,
+                         Sched<[]>;
+}
+
 // Conditional binops, that updates update $dst to (op rs1, rs2) when condition
 // is true. Returns $falsev otherwise. Selected by optimizeSelect.
 // TODO: Can we use DefaultOperands on the regular binop to accomplish this more
@@ -1519,7 +1537,7 @@ multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> {
              (IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>;
 }
 
-let Predicates = [NoShortForwardBranchOpt] in
+let Predicates = [NoConditionalMoveFusion] in
 defm Select_GPR : SelectCC_GPR_rrirr<GPR, XLenVT>;
 
 class SelectCompressOpt<CondCode Cond>

diff  --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index ba8996e710edc0..52800f086129a0 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -232,7 +232,8 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", NoSchedModel,
                                        FeatureStdExtZba,
                                        FeatureStdExtZbb,
                                        FeatureStdExtZbs,
-                                       FeatureStdExtZfhmin]>;
+                                       FeatureStdExtZfhmin],
+                                      [TuneConditionalCompressedMoveFusion]>;
 
 def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base",
                                               SyntacoreSCR1Model,

diff  --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 26320b05d9be29..2ba93764facd07 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -150,6 +150,13 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasHalfFPLoadStoreMove() const {
     return HasStdExtZfhmin || HasStdExtZfbfmin;
   }
+
+  bool hasConditionalMoveFusion() const {
+    // Do we support fusing a branch+mv or branch+c.mv as a conditional move.
+    return (hasConditionalCompressedMoveFusion() && hasStdExtCOrZca()) ||
+           hasShortForwardBranchOpt();
+  }
+
   bool is64Bit() const { return IsRV64; }
   MVT getXLenVT() const {
     return is64Bit() ? MVT::i64 : MVT::i32;

diff  --git a/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll b/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll
new file mode 100644
index 00000000000000..6ad529ea477c1a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll
@@ -0,0 +1,461 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+c -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=NOCMOV %s
+; RUN: llc -mtriple=riscv64 -mattr=+conditional-cmv-fusion,+c -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=CMOV,CMOV-NOZICOND %s
+; RUN: llc -mtriple=riscv64 -mattr=+conditional-cmv-fusion,+c,+experimental-zicond -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=CMOV,CMOV-ZICOND %s
+; RUN: llc -mtriple=riscv64 -mattr=+short-forward-branch-opt -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=SHORT_FORWARD,SFB-NOZICOND %s
+; RUN: llc -mtriple=riscv64 -mattr=+short-forward-branch-opt,+c -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=SHORT_FORWARD,SFB-NOZICOND %s
+; RUN: llc -mtriple=riscv64 -mattr=+short-forward-branch-opt,+experimental-zicond -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=SHORT_FORWARD,SFB-ZICOND %s
+
+; The conditional move optimization in sifive-p450 requires that only a
+; single c.mv instruction appears in the branch shadow.
+
+; The sifive-7-series can predicate an xor.
+
+define signext i32 @test1(i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOCMOV-LABEL: test1:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    snez a2, a2
+; NOCMOV-NEXT:    addi a2, a2, -1
+; NOCMOV-NEXT:    and a1, a1, a2
+; NOCMOV-NEXT:    xor a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: test1:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    xor a1, a1, a0
+; CMOV-NEXT:    bnez a2, .LBB0_2
+; CMOV-NEXT:  # %bb.1:
+; CMOV-NEXT:    mv a0, a1
+; CMOV-NEXT:  .LBB0_2:
+; CMOV-NEXT:    ret
+;
+; SHORT_FORWARD-LABEL: test1:
+; SHORT_FORWARD:       # %bb.0:
+; SHORT_FORWARD-NEXT:    bnez a2, .LBB0_2
+; SHORT_FORWARD-NEXT:  # %bb.1:
+; SHORT_FORWARD-NEXT:    xor a0, a0, a1
+; SHORT_FORWARD-NEXT:  .LBB0_2:
+; SHORT_FORWARD-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %a = xor i32 %x, %y
+  %b = select i1 %c, i32 %a, i32 %x
+  ret i32 %b
+}
+
+define signext i32 @test2(i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOCMOV-LABEL: test2:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    seqz a2, a2
+; NOCMOV-NEXT:    addi a2, a2, -1
+; NOCMOV-NEXT:    and a1, a1, a2
+; NOCMOV-NEXT:    xor a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: test2:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    xor a1, a1, a0
+; CMOV-NEXT:    beqz a2, .LBB1_2
+; CMOV-NEXT:  # %bb.1:
+; CMOV-NEXT:    mv a0, a1
+; CMOV-NEXT:  .LBB1_2:
+; CMOV-NEXT:    ret
+;
+; SHORT_FORWARD-LABEL: test2:
+; SHORT_FORWARD:       # %bb.0:
+; SHORT_FORWARD-NEXT:    beqz a2, .LBB1_2
+; SHORT_FORWARD-NEXT:  # %bb.1:
+; SHORT_FORWARD-NEXT:    xor a0, a0, a1
+; SHORT_FORWARD-NEXT:  .LBB1_2:
+; SHORT_FORWARD-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %a = xor i32 %x, %y
+  %b = select i1 %c, i32 %x, i32 %a
+  ret i32 %b
+}
+
+; Make sure we don't share the same basic block for two selects with the same
+; condition.
+define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOCMOV-LABEL: test3:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    seqz a4, a4
+; NOCMOV-NEXT:    addi a4, a4, -1
+; NOCMOV-NEXT:    and a1, a1, a4
+; NOCMOV-NEXT:    xor a0, a0, a1
+; NOCMOV-NEXT:    and a3, a3, a4
+; NOCMOV-NEXT:    xor a2, a2, a3
+; NOCMOV-NEXT:    addw a0, a0, a2
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: test3:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    xor a1, a1, a0
+; CMOV-NEXT:    bnez a4, .LBB2_2
+; CMOV-NEXT:  # %bb.1:
+; CMOV-NEXT:    mv a1, a0
+; CMOV-NEXT:  .LBB2_2:
+; CMOV-NEXT:    xor a0, a2, a3
+; CMOV-NEXT:    bnez a4, .LBB2_4
+; CMOV-NEXT:  # %bb.3:
+; CMOV-NEXT:    mv a0, a2
+; CMOV-NEXT:  .LBB2_4:
+; CMOV-NEXT:    addw a0, a0, a1
+; CMOV-NEXT:    ret
+;
+; SHORT_FORWARD-LABEL: test3:
+; SHORT_FORWARD:       # %bb.0:
+; SHORT_FORWARD-NEXT:    beqz a4, .LBB2_2
+; SHORT_FORWARD-NEXT:  # %bb.1:
+; SHORT_FORWARD-NEXT:    xor a0, a0, a1
+; SHORT_FORWARD-NEXT:  .LBB2_2:
+; SHORT_FORWARD-NEXT:    beqz a4, .LBB2_4
+; SHORT_FORWARD-NEXT:  # %bb.3:
+; SHORT_FORWARD-NEXT:    xor a2, a2, a3
+; SHORT_FORWARD-NEXT:  .LBB2_4:
+; SHORT_FORWARD-NEXT:    addw a0, a0, a2
+; SHORT_FORWARD-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %a = xor i32 %v, %w
+  %b = select i1 %c, i32 %v, i32 %a
+  %d = xor i32 %x, %y
+  %e = select i1 %c, i32 %x, i32 %d
+  %f = add i32 %b, %e
+  ret i32 %f
+}
+
+define signext i32 @test4(i32 signext %x, i32 signext %y, i32 signext %z) {
+; NOCMOV-LABEL: test4:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    snez a0, a2
+; NOCMOV-NEXT:    addi a0, a0, -1
+; NOCMOV-NEXT:    andi a0, a0, 3
+; NOCMOV-NEXT:    ret
+;
+; CMOV-NOZICOND-LABEL: test4:
+; CMOV-NOZICOND:       # %bb.0:
+; CMOV-NOZICOND-NEXT:    li a1, 0
+; CMOV-NOZICOND-NEXT:    li a0, 3
+; CMOV-NOZICOND-NEXT:    beqz a2, .LBB3_2
+; CMOV-NOZICOND-NEXT:  # %bb.1:
+; CMOV-NOZICOND-NEXT:    mv a0, a1
+; CMOV-NOZICOND-NEXT:  .LBB3_2:
+; CMOV-NOZICOND-NEXT:    ret
+;
+; CMOV-ZICOND-LABEL: test4:
+; CMOV-ZICOND:       # %bb.0:
+; CMOV-ZICOND-NEXT:    li a0, 3
+; CMOV-ZICOND-NEXT:    czero.nez a0, a0, a2
+; CMOV-ZICOND-NEXT:    ret
+;
+; SFB-NOZICOND-LABEL: test4:
+; SFB-NOZICOND:       # %bb.0:
+; SFB-NOZICOND-NEXT:    li a0, 3
+; SFB-NOZICOND-NEXT:    beqz a2, .LBB3_2
+; SFB-NOZICOND-NEXT:  # %bb.1:
+; SFB-NOZICOND-NEXT:    li a0, 0
+; SFB-NOZICOND-NEXT:  .LBB3_2:
+; SFB-NOZICOND-NEXT:    ret
+;
+; SFB-ZICOND-LABEL: test4:
+; SFB-ZICOND:       # %bb.0:
+; SFB-ZICOND-NEXT:    li a0, 3
+; SFB-ZICOND-NEXT:    czero.nez a0, a0, a2
+; SFB-ZICOND-NEXT:    ret
+  %c = icmp eq i32 %z, 0
+  %a = select i1 %c, i32 3, i32 0
+  ret i32 %a
+}
+
+define i16 @select_xor_1(i16 %A, i8 %cond) {
+; NOCMOV-LABEL: select_xor_1:
+; NOCMOV:       # %bb.0: # %entry
+; NOCMOV-NEXT:    slli a1, a1, 63
+; NOCMOV-NEXT:    srai a1, a1, 63
+; NOCMOV-NEXT:    andi a1, a1, 43
+; NOCMOV-NEXT:    xor a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: select_xor_1:
+; CMOV:       # %bb.0: # %entry
+; CMOV-NEXT:    andi a1, a1, 1
+; CMOV-NEXT:    xori a2, a0, 43
+; CMOV-NEXT:    beqz a1, .LBB4_2
+; CMOV-NEXT:  # %bb.1: # %entry
+; CMOV-NEXT:    mv a0, a2
+; CMOV-NEXT:  .LBB4_2: # %entry
+; CMOV-NEXT:    ret
+;
+; SHORT_FORWARD-LABEL: select_xor_1:
+; SHORT_FORWARD:       # %bb.0: # %entry
+; SHORT_FORWARD-NEXT:    andi a1, a1, 1
+; SHORT_FORWARD-NEXT:    beqz a1, .LBB4_2
+; SHORT_FORWARD-NEXT:  # %bb.1: # %entry
+; SHORT_FORWARD-NEXT:    xori a0, a0, 43
+; SHORT_FORWARD-NEXT:  .LBB4_2: # %entry
+; SHORT_FORWARD-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp eq i8 %and, 0
+ %0 = xor i16 %A, 43
+ %1 = select i1 %cmp10, i16 %A, i16 %0
+ ret i16 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i16 @select_xor_1b(i16 %A, i8 %cond) {
+; NOCMOV-LABEL: select_xor_1b:
+; NOCMOV:       # %bb.0: # %entry
+; NOCMOV-NEXT:    slli a1, a1, 63
+; NOCMOV-NEXT:    srai a1, a1, 63
+; NOCMOV-NEXT:    andi a1, a1, 43
+; NOCMOV-NEXT:    xor a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: select_xor_1b:
+; CMOV:       # %bb.0: # %entry
+; CMOV-NEXT:    andi a1, a1, 1
+; CMOV-NEXT:    xori a2, a0, 43
+; CMOV-NEXT:    beqz a1, .LBB5_2
+; CMOV-NEXT:  # %bb.1: # %entry
+; CMOV-NEXT:    mv a0, a2
+; CMOV-NEXT:  .LBB5_2: # %entry
+; CMOV-NEXT:    ret
+;
+; SHORT_FORWARD-LABEL: select_xor_1b:
+; SHORT_FORWARD:       # %bb.0: # %entry
+; SHORT_FORWARD-NEXT:    andi a1, a1, 1
+; SHORT_FORWARD-NEXT:    beqz a1, .LBB5_2
+; SHORT_FORWARD-NEXT:  # %bb.1: # %entry
+; SHORT_FORWARD-NEXT:    xori a0, a0, 43
+; SHORT_FORWARD-NEXT:  .LBB5_2: # %entry
+; SHORT_FORWARD-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp ne i8 %and, 1
+ %0 = xor i16 %A, 43
+ %1 = select i1 %cmp10, i16 %A, i16 %0
+ ret i16 %1
+}
+
+define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) {
+; NOCMOV-LABEL: select_xor_2:
+; NOCMOV:       # %bb.0: # %entry
+; NOCMOV-NEXT:    slli a2, a2, 63
+; NOCMOV-NEXT:    srai a2, a2, 63
+; NOCMOV-NEXT:    and a1, a1, a2
+; NOCMOV-NEXT:    xor a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: select_xor_2:
+; CMOV:       # %bb.0: # %entry
+; CMOV-NEXT:    andi a2, a2, 1
+; CMOV-NEXT:    xor a1, a1, a0
+; CMOV-NEXT:    beqz a2, .LBB6_2
+; CMOV-NEXT:  # %bb.1: # %entry
+; CMOV-NEXT:    mv a0, a1
+; CMOV-NEXT:  .LBB6_2: # %entry
+; CMOV-NEXT:    ret
+;
+; SFB-ZICOND-LABEL: select_xor_2:
+; SFB-ZICOND:       # %bb.0: # %entry
+; SFB-ZICOND-NEXT:    andi a2, a2, 1
+; SFB-ZICOND-NEXT:    beqz a2, .LBB6_2
+; SFB-ZICOND-NEXT:  # %bb.1: # %entry
+; SFB-ZICOND-NEXT:    xor a0, a1, a0
+; SFB-ZICOND-NEXT:  .LBB6_2: # %entry
+; SFB-ZICOND-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp eq i8 %and, 0
+ %0 = xor i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
+; NOCMOV-LABEL: select_xor_2b:
+; NOCMOV:       # %bb.0: # %entry
+; NOCMOV-NEXT:    slli a2, a2, 63
+; NOCMOV-NEXT:    srai a2, a2, 63
+; NOCMOV-NEXT:    and a1, a1, a2
+; NOCMOV-NEXT:    xor a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: select_xor_2b:
+; CMOV:       # %bb.0: # %entry
+; CMOV-NEXT:    andi a2, a2, 1
+; CMOV-NEXT:    xor a1, a1, a0
+; CMOV-NEXT:    beqz a2, .LBB7_2
+; CMOV-NEXT:  # %bb.1: # %entry
+; CMOV-NEXT:    mv a0, a1
+; CMOV-NEXT:  .LBB7_2: # %entry
+; CMOV-NEXT:    ret
+;
+; SFB-ZICOND-LABEL: select_xor_2b:
+; SFB-ZICOND:       # %bb.0: # %entry
+; SFB-ZICOND-NEXT:    andi a2, a2, 1
+; SFB-ZICOND-NEXT:    beqz a2, .LBB7_2
+; SFB-ZICOND-NEXT:  # %bb.1: # %entry
+; SFB-ZICOND-NEXT:    xor a0, a1, a0
+; SFB-ZICOND-NEXT:  .LBB7_2: # %entry
+; SFB-ZICOND-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp ne i8 %and, 1
+ %0 = xor i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+define i32 @select_or(i32 %A, i32 %B, i8 %cond) {
+; NOCMOV-LABEL: select_or:
+; NOCMOV:       # %bb.0: # %entry
+; NOCMOV-NEXT:    slli a2, a2, 63
+; NOCMOV-NEXT:    srai a2, a2, 63
+; NOCMOV-NEXT:    and a1, a1, a2
+; NOCMOV-NEXT:    or a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: select_or:
+; CMOV:       # %bb.0: # %entry
+; CMOV-NEXT:    andi a2, a2, 1
+; CMOV-NEXT:    or a1, a1, a0
+; CMOV-NEXT:    beqz a2, .LBB8_2
+; CMOV-NEXT:  # %bb.1: # %entry
+; CMOV-NEXT:    mv a0, a1
+; CMOV-NEXT:  .LBB8_2: # %entry
+; CMOV-NEXT:    ret
+;
+; SFB-ZICOND-LABEL: select_or:
+; SFB-ZICOND:       # %bb.0: # %entry
+; SFB-ZICOND-NEXT:    andi a2, a2, 1
+; SFB-ZICOND-NEXT:    beqz a2, .LBB8_2
+; SFB-ZICOND-NEXT:  # %bb.1: # %entry
+; SFB-ZICOND-NEXT:    or a0, a1, a0
+; SFB-ZICOND-NEXT:  .LBB8_2: # %entry
+; SFB-ZICOND-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp eq i8 %and, 0
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
+; NOCMOV-LABEL: select_or_b:
+; NOCMOV:       # %bb.0: # %entry
+; NOCMOV-NEXT:    slli a2, a2, 63
+; NOCMOV-NEXT:    srai a2, a2, 63
+; NOCMOV-NEXT:    and a1, a1, a2
+; NOCMOV-NEXT:    or a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: select_or_b:
+; CMOV:       # %bb.0: # %entry
+; CMOV-NEXT:    andi a2, a2, 1
+; CMOV-NEXT:    or a1, a1, a0
+; CMOV-NEXT:    beqz a2, .LBB9_2
+; CMOV-NEXT:  # %bb.1: # %entry
+; CMOV-NEXT:    mv a0, a1
+; CMOV-NEXT:  .LBB9_2: # %entry
+; CMOV-NEXT:    ret
+;
+; SFB-ZICOND-LABEL: select_or_b:
+; SFB-ZICOND:       # %bb.0: # %entry
+; SFB-ZICOND-NEXT:    andi a2, a2, 1
+; SFB-ZICOND-NEXT:    beqz a2, .LBB9_2
+; SFB-ZICOND-NEXT:  # %bb.1: # %entry
+; SFB-ZICOND-NEXT:    or a0, a1, a0
+; SFB-ZICOND-NEXT:  .LBB9_2: # %entry
+; SFB-ZICOND-NEXT:    ret
+entry:
+ %and = and i8 %cond, 1
+ %cmp10 = icmp ne i8 %and, 1
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) {
+; NOCMOV-LABEL: select_or_1:
+; NOCMOV:       # %bb.0: # %entry
+; NOCMOV-NEXT:    slli a2, a2, 63
+; NOCMOV-NEXT:    srai a2, a2, 63
+; NOCMOV-NEXT:    and a1, a1, a2
+; NOCMOV-NEXT:    or a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: select_or_1:
+; CMOV:       # %bb.0: # %entry
+; CMOV-NEXT:    andi a2, a2, 1
+; CMOV-NEXT:    or a1, a1, a0
+; CMOV-NEXT:    beqz a2, .LBB10_2
+; CMOV-NEXT:  # %bb.1: # %entry
+; CMOV-NEXT:    mv a0, a1
+; CMOV-NEXT:  .LBB10_2: # %entry
+; CMOV-NEXT:    ret
+;
+; SFB-ZICOND-LABEL: select_or_1:
+; SFB-ZICOND:       # %bb.0: # %entry
+; SFB-ZICOND-NEXT:    andi a2, a2, 1
+; SFB-ZICOND-NEXT:    beqz a2, .LBB10_2
+; SFB-ZICOND-NEXT:  # %bb.1: # %entry
+; SFB-ZICOND-NEXT:    or a0, a1, a0
+; SFB-ZICOND-NEXT:  .LBB10_2: # %entry
+; SFB-ZICOND-NEXT:    ret
+entry:
+ %and = and i32 %cond, 1
+ %cmp10 = icmp eq i32 %and, 0
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}
+
+; Equivalent to above, but with icmp ne (and %cond, 1), 1 instead of
+; icmp eq (and %cond, 1), 0
+define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
+; NOCMOV-LABEL: select_or_1b:
+; NOCMOV:       # %bb.0: # %entry
+; NOCMOV-NEXT:    slli a2, a2, 63
+; NOCMOV-NEXT:    srai a2, a2, 63
+; NOCMOV-NEXT:    and a1, a1, a2
+; NOCMOV-NEXT:    or a0, a0, a1
+; NOCMOV-NEXT:    ret
+;
+; CMOV-LABEL: select_or_1b:
+; CMOV:       # %bb.0: # %entry
+; CMOV-NEXT:    andi a2, a2, 1
+; CMOV-NEXT:    or a1, a1, a0
+; CMOV-NEXT:    beqz a2, .LBB11_2
+; CMOV-NEXT:  # %bb.1: # %entry
+; CMOV-NEXT:    mv a0, a1
+; CMOV-NEXT:  .LBB11_2: # %entry
+; CMOV-NEXT:    ret
+;
+; SFB-ZICOND-LABEL: select_or_1b:
+; SFB-ZICOND:       # %bb.0: # %entry
+; SFB-ZICOND-NEXT:    andi a2, a2, 1
+; SFB-ZICOND-NEXT:    beqz a2, .LBB11_2
+; SFB-ZICOND-NEXT:  # %bb.1: # %entry
+; SFB-ZICOND-NEXT:    or a0, a1, a0
+; SFB-ZICOND-NEXT:  .LBB11_2: # %entry
+; SFB-ZICOND-NEXT:    ret
+entry:
+ %and = and i32 %cond, 1
+ %cmp10 = icmp ne i32 %and, 1
+ %0 = or i32 %B, %A
+ %1 = select i1 %cmp10, i32 %A, i32 %0
+ ret i32 %1
+}