[llvm] b661470 - Revert "Revert "[AArch64][GlobalISel] Optimize conjunctions of compares to conditional compares.""

Tue Feb 22 17:22:17 PST 2022

Author: Amara Emerson
Date: 2022-02-22T17:22:11-08:00
New Revision: b661470bce1454f7e08c7efe932067a25737db7f

URL: https://github.com/llvm/llvm-project/commit/b661470bce1454f7e08c7efe932067a25737db7f
DIFF: https://github.com/llvm/llvm-project/commit/b661470bce1454f7e08c7efe932067a25737db7f.diff

LOG: Revert "Revert "[AArch64][GlobalISel] Optimize conjunctions of compares to conditional compares.""

This reverts commit 55c181a6c786cfbfa8b7aabe0a8ba721a65b1445.

The original commit I made was an old patch, mea culpa. Committing the right
implementation with test case for the reported crash.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/arm64-ccmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 7103656365b1b..58fe48200e732 100644

--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
 #define LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
 
+#include "llvm/IR/Instructions.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
@@ -226,6 +227,37 @@ class GSelect : public GenericMachineInstr {
   }
 };
 
+/// Represent a G_ICMP or G_FCMP.
+class GAnyCmp : public GenericMachineInstr {
+public:
+  CmpInst::Predicate getCond() const {
+    return static_cast<CmpInst::Predicate>(getOperand(1).getPredicate());
+  }
+  Register getLHSReg() const { return getReg(2); }
+  Register getRHSReg() const { return getReg(3); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_ICMP ||
+           MI->getOpcode() == TargetOpcode::G_FCMP;
+  }
+};
+
+/// Represent a G_ICMP.
+class GICmp : public GAnyCmp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_ICMP;
+  }
+};
+
+/// Represent a G_FCMP.
+class GFCmp : public GAnyCmp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_FCMP;
+  }
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 8a79d2426c8f0..5426844e59ca1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -63,6 +64,7 @@ namespace {
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_PREDICATE_BITSET
 
+
 class AArch64InstructionSelector : public InstructionSelector {
 public:
   AArch64InstructionSelector(const AArch64TargetMachine &TM,
@@ -294,6 +296,20 @@ class AArch64InstructionSelector : public InstructionSelector {
   emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
                  MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
 
+  /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
+  /// In some cases this is even possible with OR operations in the expression.
+  MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
+                                MachineIRBuilder &MIB) const;
+  MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
+                                          CmpInst::Predicate CC,
+                                          AArch64CC::CondCode Predicate,
+                                          AArch64CC::CondCode OutCC,
+                                          MachineIRBuilder &MIB) const;
+  MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
+                                   bool Negate, Register CCOp,
+                                   AArch64CC::CondCode Predicate,
+                                   MachineIRBuilder &MIB) const;
+
   /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
   /// \p IsNegative is true if the test should be "not zero".
   /// This will also optimize the test bit instruction when possible.
@@ -425,7 +441,8 @@ class AArch64InstructionSelector : public InstructionSelector {
   void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
 
   // Optimization methods.
-  bool tryOptSelect(MachineInstr &MI);
+  bool tryOptSelect(GSelect &Sel);
+  bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
                                       MachineOperand &Predicate,
                                       MachineIRBuilder &MIRBuilder) const;
@@ -1310,6 +1327,90 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
   }
 }
 
+/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
+static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
+                                    AArch64CC::CondCode &CondCode,
+                                    AArch64CC::CondCode &CondCode2) {
+  CondCode2 = AArch64CC::AL;
+  switch (CC) {
+  default:
+    llvm_unreachable("Unknown FP condition!");
+  case CmpInst::FCMP_OEQ:
+    CondCode = AArch64CC::EQ;
+    break;
+  case CmpInst::FCMP_OGT:
+    CondCode = AArch64CC::GT;
+    break;
+  case CmpInst::FCMP_OGE:
+    CondCode = AArch64CC::GE;
+    break;
+  case CmpInst::FCMP_OLT:
+    CondCode = AArch64CC::MI;
+    break;
+  case CmpInst::FCMP_OLE:
+    CondCode = AArch64CC::LS;
+    break;
+  case CmpInst::FCMP_ONE:
+    CondCode = AArch64CC::MI;
+    CondCode2 = AArch64CC::GT;
+    break;
+  case CmpInst::FCMP_ORD:
+    CondCode = AArch64CC::VC;
+    break;
+  case CmpInst::FCMP_UNO:
+    CondCode = AArch64CC::VS;
+    break;
+  case CmpInst::FCMP_UEQ:
+    CondCode = AArch64CC::EQ;
+    CondCode2 = AArch64CC::VS;
+    break;
+  case CmpInst::FCMP_UGT:
+    CondCode = AArch64CC::HI;
+    break;
+  case CmpInst::FCMP_UGE:
+    CondCode = AArch64CC::PL;
+    break;
+  case CmpInst::FCMP_ULT:
+    CondCode = AArch64CC::LT;
+    break;
+  case CmpInst::FCMP_ULE:
+    CondCode = AArch64CC::LE;
+    break;
+  case CmpInst::FCMP_UNE:
+    CondCode = AArch64CC::NE;
+    break;
+  }
+}
+
+/// Convert an IR fp condition code to an AArch64 CC.
+/// This 
diff ers from changeFPCCToAArch64CC in that it returns cond codes that
+/// should be AND'ed instead of OR'ed.
+static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
+                                     AArch64CC::CondCode &CondCode,
+                                     AArch64CC::CondCode &CondCode2) {
+  CondCode2 = AArch64CC::AL;
+  switch (CC) {
+  default:
+    changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
+    assert(CondCode2 == AArch64CC::AL);
+    break;
+  case CmpInst::FCMP_ONE:
+    // (a one b)
+    // == ((a olt b) || (a ogt b))
+    // == ((a ord b) && (a une b))
+    CondCode = AArch64CC::VC;
+    CondCode2 = AArch64CC::NE;
+    break;
+  case CmpInst::FCMP_UEQ:
+    // (a ueq b)
+    // == ((a uno b) || (a oeq b))
+    // == ((a ule b) && (a uge b))
+    CondCode = AArch64CC::PL;
+    CondCode2 = AArch64CC::LE;
+    break;
+  }
+}
+
 /// Return a register which can be used as a bit to test in a TB(N)Z.
 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
                               MachineRegisterInfo &MRI) {
@@ -3292,17 +3393,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     return selectCopy(I, TII, MRI, TRI, RBI);
 
   case TargetOpcode::G_SELECT: {
-    if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
+    auto &Sel = cast<GSelect>(I);
+    if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) {
       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
                         << ", expected: " << LLT::scalar(1) << '\n');
       return false;
     }
 
-    const Register CondReg = I.getOperand(1).getReg();
-    const Register TReg = I.getOperand(2).getReg();
-    const Register FReg = I.getOperand(3).getReg();
+    const Register CondReg = Sel.getCondReg();
+    const Register TReg = Sel.getTrueReg();
+    const Register FReg = Sel.getFalseReg();
 
-    if (tryOptSelect(I))
+    if (tryOptSelect(Sel))
       return true;
 
     // Make sure to use an unused vreg instead of wzr, so that the peephole
@@ -3311,9 +3413,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
                      .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
     constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
-    if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
+    if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
       return false;
-    I.eraseFromParent();
+    Sel.eraseFromParent();
     return true;
   }
   case TargetOpcode::G_ICMP: {
@@ -4702,7 +4804,256 @@ AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
   }
 }
 
-bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
+/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
+/// expressed as a conjunction.
+/// \param CanNegate    Set to true if we can negate the whole sub-tree just by
+///                     changing the conditions on the CMP tests.
+///                     (this means we can call emitConjunctionRec() with
+///                      Negate==true on this sub-tree)
+/// \param MustBeFirst  Set to true if this subtree needs to be negated and we
+///                     cannot do the negation naturally. We are required to
+///                     emit the subtree first in this case.
+/// \param WillNegate   Is true if are called when the result of this
+///                     subexpression must be negated. This happens when the
+///                     outer expression is an OR. We can use this fact to know
+///                     that we have a double negation (or (or ...) ...) that
+///                     can be implemented for free.
+static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
+                               bool WillNegate, MachineRegisterInfo &MRI,
+                               unsigned Depth = 0) {
+  if (!MRI.hasOneNonDBGUse(Val))
+    return false;
+  MachineInstr *ValDef = MRI.getVRegDef(Val);
+  unsigned Opcode = ValDef->getOpcode();
+  if (Opcode == TargetOpcode::G_TRUNC) {
+    // Look through a trunc.
+    Val = ValDef->getOperand(1).getReg();
+    ValDef = MRI.getVRegDef(Val);
+    Opcode = ValDef->getOpcode();
+  }
+  if (isa<GAnyCmp>(ValDef)) {
+    CanNegate = true;
+    MustBeFirst = false;
+    return true;
+  }
+  // Protect against exponential runtime and stack overflow.
+  if (Depth > 6)
+    return false;
+  if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
+    bool IsOR = Opcode == TargetOpcode::G_OR;
+    Register O0 = ValDef->getOperand(1).getReg();
+    Register O1 = ValDef->getOperand(2).getReg();
+    bool CanNegateL;
+    bool MustBeFirstL;
+    if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
+      return false;
+    bool CanNegateR;
+    bool MustBeFirstR;
+    if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
+      return false;
+
+    if (MustBeFirstL && MustBeFirstR)
+      return false;
+
+    if (IsOR) {
+      // For an OR expression we need to be able to naturally negate at least
+      // one side or we cannot do the transformation at all.
+      if (!CanNegateL && !CanNegateR)
+        return false;
+      // If we the result of the OR will be negated and we can naturally negate
+      // the leaves, then this sub-tree as a whole negates naturally.
+      CanNegate = WillNegate && CanNegateL && CanNegateR;
+      // If we cannot naturally negate the whole sub-tree, then this must be
+      // emitted first.
+      MustBeFirst = !CanNegate;
+    } else {
+      assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
+      // We cannot naturally negate an AND operation.
+      CanNegate = false;
+      MustBeFirst = MustBeFirstL || MustBeFirstR;
+    }
+    return true;
+  }
+  return false;
+}
+
+MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
+    Register LHS, Register RHS, CmpInst::Predicate CC,
+    AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
+    MachineIRBuilder &MIB) const {
+  // TODO: emit CMN as an optimization.
+  auto &MRI = *MIB.getMRI();
+  LLT OpTy = MRI.getType(LHS);
+  assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
+  unsigned CCmpOpc;
+  if (CmpInst::isIntPredicate(CC)) {
+    CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
+  } else {
+    switch (OpTy.getSizeInBits()) {
+    case 16:
+      CCmpOpc = AArch64::FCCMPHrr;
+      break;
+    case 32:
+      CCmpOpc = AArch64::FCCMPSrr;
+      break;
+    case 64:
+      CCmpOpc = AArch64::FCCMPDrr;
+      break;
+    default:
+      return nullptr;
+    }
+  }
+  AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
+  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
+  auto CCmp =
+      MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate);
+  constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
+  return &*CCmp;
+}
+
+MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
+    Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
+    AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
+  // We're at a tree leaf, produce a conditional comparison operation.
+  auto &MRI = *MIB.getMRI();
+  MachineInstr *ValDef = MRI.getVRegDef(Val);
+  unsigned Opcode = ValDef->getOpcode();
+  if (Opcode == TargetOpcode::G_TRUNC) {
+    // Look through a trunc.
+    Val = ValDef->getOperand(1).getReg();
+    ValDef = MRI.getVRegDef(Val);
+    Opcode = ValDef->getOpcode();
+  }
+  if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
+    Register LHS = Cmp->getLHSReg();
+    Register RHS = Cmp->getRHSReg();
+    CmpInst::Predicate CC = Cmp->getCond();
+    if (Negate)
+      CC = CmpInst::getInversePredicate(CC);
+    if (isa<GICmp>(Cmp)) {
+      OutCC = changeICMPPredToAArch64CC(CC);
+    } else {
+      // Handle special FP cases.
+      AArch64CC::CondCode ExtraCC;
+      changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
+      // Some floating point conditions can't be tested with a single condition
+      // code. Construct an additional comparison in this case.
+      if (ExtraCC != AArch64CC::AL) {
+        MachineInstr *ExtraCmp;
+        if (!CCOp)
+          ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
+        else
+          ExtraCmp =
+              emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
+        CCOp = ExtraCmp->getOperand(0).getReg();
+        Predicate = ExtraCC;
+      }
+    }
+
+    // Produce a normal comparison if we are first in the chain
+    if (!CCOp) {
+      auto Dst = MRI.cloneVirtualRegister(LHS);
+      if (isa<GICmp>(Cmp))
+        return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
+      return emitFPCompare(Cmp->getOperand(2).getReg(),
+                           Cmp->getOperand(3).getReg(), MIB);
+    }
+    // Otherwise produce a ccmp.
+    return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
+  }
+  assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
+
+  bool IsOR = Opcode == TargetOpcode::G_OR;
+
+  Register LHS = ValDef->getOperand(1).getReg();
+  bool CanNegateL;
+  bool MustBeFirstL;
+  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
+  assert(ValidL && "Valid conjunction/disjunction tree");
+  (void)ValidL;
+
+  Register RHS = ValDef->getOperand(2).getReg();
+  bool CanNegateR;
+  bool MustBeFirstR;
+  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
+  assert(ValidR && "Valid conjunction/disjunction tree");
+  (void)ValidR;
+
+  // Swap sub-tree that must come first to the right side.
+  if (MustBeFirstL) {
+    assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
+    std::swap(LHS, RHS);
+    std::swap(CanNegateL, CanNegateR);
+    std::swap(MustBeFirstL, MustBeFirstR);
+  }
+
+  bool NegateR;
+  bool NegateAfterR;
+  bool NegateL;
+  bool NegateAfterAll;
+  if (Opcode == TargetOpcode::G_OR) {
+    // Swap the sub-tree that we can negate naturally to the left.
+    if (!CanNegateL) {
+      assert(CanNegateR && "at least one side must be negatable");
+      assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
+      assert(!Negate);
+      std::swap(LHS, RHS);
+      NegateR = false;
+      NegateAfterR = true;
+    } else {
+      // Negate the left sub-tree if possible, otherwise negate the result.
+      NegateR = CanNegateR;
+      NegateAfterR = !CanNegateR;
+    }
+    NegateL = true;
+    NegateAfterAll = !Negate;
+  } else {
+    assert(Opcode == TargetOpcode::G_AND &&
+           "Valid conjunction/disjunction tree");
+    assert(!Negate && "Valid conjunction/disjunction tree");
+
+    NegateL = false;
+    NegateR = false;
+    NegateAfterR = false;
+    NegateAfterAll = false;
+  }
+
+  // Emit sub-trees.
+  AArch64CC::CondCode RHSCC;
+  MachineInstr *CmpR =
+      emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
+  if (NegateAfterR)
+    RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
+  MachineInstr *CmpL = emitConjunctionRec(
+      LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
+  if (NegateAfterAll)
+    OutCC = AArch64CC::getInvertedCondCode(OutCC);
+  return CmpL;
+}
+
+MachineInstr *AArch64InstructionSelector::emitConjunction(
+    Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
+  bool DummyCanNegate;
+  bool DummyMustBeFirst;
+  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
+                          *MIB.getMRI()))
+    return nullptr;
+  return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
+}
+
+bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
+                                                         MachineInstr &CondMI) {
+  AArch64CC::CondCode AArch64CC;
+  MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
+  if (!ConjMI)
+    return false;
+
+  emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
+  SelI.eraseFromParent();
+  return true;
+}
+
+bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
   MachineRegisterInfo &MRI = *MIB.getMRI();
   // We want to recognize this pattern:
   //
@@ -4755,8 +5106,11 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
     return false;
 
   unsigned CondOpc = CondDef->getOpcode();
-  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
+  if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
+    if (tryOptSelectConjunction(I, *CondDef))
+      return true;
     return false;
+  }
 
   AArch64CC::CondCode CondCode;
   if (CondOpc == TargetOpcode::G_ICMP) {

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index f81ed69b137f6..d1430096e0c22 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -569,14 +569,10 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_and:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, lt
-; GISEL-NEXT:    mov w9, #5
-; GISEL-NEXT:    cmp w9, w1
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel x0, x2, x3, ne
+; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    cmp w8, w1
+; GISEL-NEXT:    ccmp w0, w1, #0, ne
+; GISEL-NEXT:    csel x0, x2, x3, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -595,14 +591,10 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_or:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, lt
-; GISEL-NEXT:    mov w9, #5
-; GISEL-NEXT:    cmp w9, w1
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel x0, x2, x3, ne
+; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    cmp w8, w1
+; GISEL-NEXT:    ccmp w0, w1, #8, eq
+; GISEL-NEXT:    csel x0, x2, x3, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -611,6 +603,28 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
   ret i64 %sel
 }
 
+define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
+; CHECK-LABEL: select_or_float:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w1, #5
+; CHECK-NEXT:    ccmp w0, w1, #8, eq
+; CHECK-NEXT:    fcsel s0, s0, s1, lt
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_or_float:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    cmp w8, w1
+; GISEL-NEXT:    ccmp w0, w1, #8, eq
+; GISEL-NEXT:    fcsel s0, s0, s1, lt
+; GISEL-NEXT:    ret
+  %1 = icmp slt i32 %w0, %w1
+  %2 = icmp ne i32 5, %w1
+  %3 = or i1 %1, %2
+  %sel = select i1 %3, float %x2,float %x3
+  ret float %sel
+}
+
 define i64 @gccbug(i64 %x0, i64 %x1) {
 ; CHECK-LABEL: gccbug:
 ; CHECK:       ; %bb.0:
@@ -623,17 +637,12 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
 ;
 ; GISEL-LABEL: gccbug:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp x1, #0
-; GISEL-NEXT:    cset w8, eq
-; GISEL-NEXT:    mov w9, #2
+; GISEL-NEXT:    mov w8, #2
+; GISEL-NEXT:    mov w9, #4
 ; GISEL-NEXT:    cmp x0, #2
-; GISEL-NEXT:    cset w10, eq
-; GISEL-NEXT:    cmp x0, #4
-; GISEL-NEXT:    cset w11, eq
-; GISEL-NEXT:    orr w10, w11, w10
-; GISEL-NEXT:    and w8, w10, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csinc x0, x9, xzr, ne
+; GISEL-NEXT:    ccmp x0, x9, #4, ne
+; GISEL-NEXT:    ccmp x1, xzr, #0, eq
+; GISEL-NEXT:    csinc x0, x8, xzr, eq
 ; GISEL-NEXT:    ret
   %cmp0 = icmp eq i64 %x1, 0
   %cmp1 = icmp eq i64 %x0, 2
@@ -658,19 +667,13 @@ define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
 ;
 ; GISEL-LABEL: select_ororand:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w8, eq
-; GISEL-NEXT:    cmp w1, #13
-; GISEL-NEXT:    cset w9, hi
-; GISEL-NEXT:    cmp w2, #2
-; GISEL-NEXT:    cset w10, lt
+; GISEL-NEXT:    mov w8, #13
+; GISEL-NEXT:    mov w9, #2
 ; GISEL-NEXT:    cmp w3, #4
-; GISEL-NEXT:    cset w11, gt
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    and w9, w10, w11
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w3, wzr, ne
+; GISEL-NEXT:    ccmp w2, w9, #0, gt
+; GISEL-NEXT:    ccmp w1, w8, #2, ge
+; GISEL-NEXT:    ccmp w0, wzr, #4, ls
+; GISEL-NEXT:    csel w0, w3, wzr, eq
 ; GISEL-NEXT:    ret
   %c0 = icmp eq i32 %w0, 0
   %c1 = icmp ugt i32 %w1, 13
@@ -694,16 +697,10 @@ define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) {
 ;
 ; GISEL-LABEL: select_andor:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, eq
 ; GISEL-NEXT:    cmp w1, w2
-; GISEL-NEXT:    cset w9, ge
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w10, eq
-; GISEL-NEXT:    orr w9, w10, w9
-; GISEL-NEXT:    and w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    ccmp w0, wzr, #4, lt
+; GISEL-NEXT:    ccmp w0, w1, #0, eq
+; GISEL-NEXT:    csel w0, w0, w1, eq
 ; GISEL-NEXT:    ret
   %c0 = icmp eq i32 %v1, %v2
   %c1 = icmp sge i32 %v2, %v3
@@ -872,14 +869,9 @@ define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i
 ; GISEL-LABEL: select_and_olt_one:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, mi
-; GISEL-NEXT:    cset w10, gt
-; GISEL-NEXT:    orr w9, w9, w10
-; GISEL-NEXT:    and w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d2, d3, #4, mi
+; GISEL-NEXT:    fccmp d2, d3, #1, ne
+; GISEL-NEXT:    csel w0, w0, w1, vc
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp one double %v2, %v3
@@ -900,14 +892,9 @@ define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i
 ; GISEL-LABEL: select_and_one_olt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    cset w9, gt
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, mi
-; GISEL-NEXT:    and w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d0, d1, #1, ne
+; GISEL-NEXT:    fccmp d2, d3, #0, vc
+; GISEL-NEXT:    csel w0, w0, w1, mi
 ; GISEL-NEXT:    ret
   %c0 = fcmp one double %v0, %v1
   %c1 = fcmp olt double %v2, %v3
@@ -928,14 +915,9 @@ define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i
 ; GISEL-LABEL: select_and_olt_ueq:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, eq
-; GISEL-NEXT:    cset w10, vs
-; GISEL-NEXT:    orr w9, w9, w10
-; GISEL-NEXT:    and w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d2, d3, #0, mi
+; GISEL-NEXT:    fccmp d2, d3, #8, le
+; GISEL-NEXT:    csel w0, w0, w1, pl
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp ueq double %v2, %v3
@@ -956,14 +938,9 @@ define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i
 ; GISEL-LABEL: select_and_ueq_olt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, eq
-; GISEL-NEXT:    cset w9, vs
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, mi
-; GISEL-NEXT:    and w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d0, d1, #8, le
+; GISEL-NEXT:    fccmp d2, d3, #0, pl
+; GISEL-NEXT:    csel w0, w0, w1, mi
 ; GISEL-NEXT:    ret
   %c0 = fcmp ueq double %v0, %v1
   %c1 = fcmp olt double %v2, %v3
@@ -984,14 +961,9 @@ define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i3
 ; GISEL-LABEL: select_or_olt_one:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, mi
-; GISEL-NEXT:    cset w10, gt
-; GISEL-NEXT:    orr w9, w9, w10
-; GISEL-NEXT:    orr w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d2, d3, #0, pl
+; GISEL-NEXT:    fccmp d2, d3, #8, le
+; GISEL-NEXT:    csel w0, w0, w1, mi
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp one double %v2, %v3
@@ -1012,14 +984,9 @@ define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i3
 ; GISEL-LABEL: select_or_one_olt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    cset w9, gt
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, mi
-; GISEL-NEXT:    orr w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d0, d1, #8, le
+; GISEL-NEXT:    fccmp d2, d3, #8, pl
+; GISEL-NEXT:    csel w0, w0, w1, mi
 ; GISEL-NEXT:    ret
   %c0 = fcmp one double %v0, %v1
   %c1 = fcmp olt double %v2, %v3
@@ -1040,14 +1007,9 @@ define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i3
 ; GISEL-LABEL: select_or_olt_ueq:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, eq
-; GISEL-NEXT:    cset w10, vs
-; GISEL-NEXT:    orr w9, w9, w10
-; GISEL-NEXT:    orr w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d2, d3, #4, pl
+; GISEL-NEXT:    fccmp d2, d3, #1, ne
+; GISEL-NEXT:    csel w0, w0, w1, vs
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp ueq double %v2, %v3
@@ -1068,14 +1030,9 @@ define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i3
 ; GISEL-LABEL: select_or_ueq_olt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, eq
-; GISEL-NEXT:    cset w9, vs
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, mi
-; GISEL-NEXT:    orr w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d0, d1, #1, ne
+; GISEL-NEXT:    fccmp d2, d3, #8, vc
+; GISEL-NEXT:    csel w0, w0, w1, mi
 ; GISEL-NEXT:    ret
   %c0 = fcmp ueq double %v0, %v1
   %c1 = fcmp olt double %v2, %v3
@@ -1097,17 +1054,10 @@ define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3
 ; GISEL-LABEL: select_or_olt_ogt_ueq:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, gt
-; GISEL-NEXT:    fcmp d4, d5
-; GISEL-NEXT:    cset w10, eq
-; GISEL-NEXT:    cset w11, vs
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    orr w8, w9, w8
-; GISEL-NEXT:    orr w8, w10, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d2, d3, #0, pl
+; GISEL-NEXT:    fccmp d4, d5, #4, le
+; GISEL-NEXT:    fccmp d4, d5, #1, ne
+; GISEL-NEXT:    csel w0, w0, w1, vs
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp ogt double %v2, %v3
@@ -1131,17 +1081,10 @@ define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3
 ; GISEL-LABEL: select_or_olt_ueq_ogt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp d0, d1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    fcmp d2, d3
-; GISEL-NEXT:    cset w9, eq
-; GISEL-NEXT:    cset w10, vs
-; GISEL-NEXT:    orr w9, w9, w10
-; GISEL-NEXT:    fcmp d4, d5
-; GISEL-NEXT:    cset w10, gt
-; GISEL-NEXT:    orr w8, w9, w8
-; GISEL-NEXT:    orr w8, w10, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp d2, d3, #4, pl
+; GISEL-NEXT:    fccmp d2, d3, #1, ne
+; GISEL-NEXT:    fccmp d4, d5, #0, vc
+; GISEL-NEXT:    csel w0, w0, w1, gt
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp ueq double %v2, %v3
@@ -1170,15 +1113,11 @@ define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcvt s0, h0
 ; GISEL-NEXT:    fcvt s1, h1
+; GISEL-NEXT:    fcvt s2, h2
+; GISEL-NEXT:    fcvt s3, h3
 ; GISEL-NEXT:    fcmp s0, s1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    fcvt s0, h2
-; GISEL-NEXT:    fcvt s1, h3
-; GISEL-NEXT:    fcmp s0, s1
-; GISEL-NEXT:    cset w9, ge
-; GISEL-NEXT:    and w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp s2, s3, #8, mi
+; GISEL-NEXT:    csel w0, w0, w1, ge
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt half %v0, %v1
   %c1 = fcmp oge half %v2, %v3
@@ -1204,17 +1143,12 @@ define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcvt s0, h0
 ; GISEL-NEXT:    fcvt s1, h1
+; GISEL-NEXT:    fcvt s2, h2
+; GISEL-NEXT:    fcvt s3, h3
 ; GISEL-NEXT:    fcmp s0, s1
-; GISEL-NEXT:    cset w8, mi
-; GISEL-NEXT:    fcvt s0, h2
-; GISEL-NEXT:    fcvt s1, h3
-; GISEL-NEXT:    fcmp s0, s1
-; GISEL-NEXT:    cset w9, mi
-; GISEL-NEXT:    cset w10, gt
-; GISEL-NEXT:    orr w9, w9, w10
-; GISEL-NEXT:    and w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
+; GISEL-NEXT:    fccmp s2, s3, #4, mi
+; GISEL-NEXT:    fccmp s2, s3, #1, ne
+; GISEL-NEXT:    csel w0, w0, w1, vc
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt half %v0, %v1
   %c1 = fcmp one half %v2, %v3
@@ -1294,18 +1228,11 @@ define i32 @deep_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
 ;
 ; GISEL-LABEL: deep_or:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w8, ne
-; GISEL-NEXT:    cmp w1, #0
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    cmp w2, #15
-; GISEL-NEXT:    cset w10, eq
+; GISEL-NEXT:    mov w8, #15
 ; GISEL-NEXT:    cmp w2, #20
-; GISEL-NEXT:    cset w11, eq
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    and w9, w10, w9
-; GISEL-NEXT:    and w8, w9, w8
-; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    ccmp w2, w8, #4, ne
+; GISEL-NEXT:    ccmp w1, wzr, #4, eq
+; GISEL-NEXT:    ccmp w0, wzr, #4, ne
 ; GISEL-NEXT:    csel w0, w4, w5, ne
 ; GISEL-NEXT:    ret
   %c0 = icmp ne i32 %a0, 0
@@ -1333,18 +1260,11 @@ define i32 @deep_or1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
 ;
 ; GISEL-LABEL: deep_or1:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w8, ne
-; GISEL-NEXT:    cmp w1, #0
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    cmp w2, #15
-; GISEL-NEXT:    cset w10, eq
+; GISEL-NEXT:    mov w8, #15
 ; GISEL-NEXT:    cmp w2, #20
-; GISEL-NEXT:    cset w11, eq
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    and w8, w8, w10
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    ccmp w2, w8, #4, ne
+; GISEL-NEXT:    ccmp w0, wzr, #4, eq
+; GISEL-NEXT:    ccmp w1, wzr, #4, ne
 ; GISEL-NEXT:    csel w0, w4, w5, ne
 ; GISEL-NEXT:    ret
   %c0 = icmp ne i32 %a0, 0
@@ -1372,18 +1292,11 @@ define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
 ;
 ; GISEL-LABEL: deep_or2:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w8, ne
-; GISEL-NEXT:    cmp w1, #0
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    cmp w2, #15
-; GISEL-NEXT:    cset w10, eq
+; GISEL-NEXT:    mov w8, #15
 ; GISEL-NEXT:    cmp w2, #20
-; GISEL-NEXT:    cset w11, eq
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    and w8, w8, w10
-; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    ccmp w2, w8, #4, ne
+; GISEL-NEXT:    ccmp w1, wzr, #4, eq
+; GISEL-NEXT:    ccmp w0, wzr, #4, ne
 ; GISEL-NEXT:    csel w0, w4, w5, ne
 ; GISEL-NEXT:    ret
   %c0 = icmp ne i32 %a0, 0