[llvm] 9745dce - [SelectionDAG][AArch64][SVE] Perform SETCC condition legalization in LegalizeVectorOps

Mon Mar 29 07:42:43 PDT 2021

Author: Bradley Smith
Date: 2021-03-29T15:32:25+01:00
New Revision: 9745dce8c3dc6996dae9bab11ff410a12d8daec7

URL: https://github.com/llvm/llvm-project/commit/9745dce8c3dc6996dae9bab11ff410a12d8daec7
DIFF: https://github.com/llvm/llvm-project/commit/9745dce8c3dc6996dae9bab11ff410a12d8daec7.diff

LOG: [SelectionDAG][AArch64][SVE] Perform SETCC condition legalization in LegalizeVectorOps

This is currently performed in SelectionDAGLegalize, here we make it also
happen in LegalizeVectorOps, allowing a target to lower the SETCC condition
codes first in LegalizeVectorOps and then lower to a custom node afterwards,
without having to duplicate all of the SETCC condition legalization in the
target specific lowering.

As a result of this, fixed length floating point SETCC nodes can now be
properly lowered for SVE.

Differential Revision: https://reviews.llvm.org/D98939

Added: 
    llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll

Modified: 
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td
    llvm/test/CodeGen/PowerPC/vsx.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 793acb6263d2..4b964dc26218 100644

--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4513,6 +4513,29 @@ class TargetLowering : public TargetLoweringBase {
   /// method accepts vectors as its arguments.
   SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const;
 
+  /// Legalize a SETCC with given LHS and RHS and condition code CC on the
+  /// current target.
+  ///
+  /// If the SETCC has been legalized using AND / OR, then the legalized node
+  /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
+  /// will be set to false.
+  ///
+  /// If the SETCC has been legalized by using getSetCCSwappedOperands(),
+  /// then the values of LHS and RHS will be swapped, CC will be set to the
+  /// new condition, and NeedInvert will be set to false.
+  ///
+  /// If the SETCC has been legalized using the inverse condcode, then LHS and
+  /// RHS will be unchanged, CC will set to the inverted condcode, and
+  /// NeedInvert will be set to true. The caller must invert the result of the
+  /// SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to swap
+  /// the effect of a true/false result.
+  ///
+  /// \returns true if the SetCC has been legalized, false if it hasn't.
+  bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS,
+                             SDValue &RHS, SDValue &CC, bool &NeedInvert,
+                             const SDLoc &dl, SDValue &Chain,
+                             bool IsSignaling = false) const;
+
   //===--------------------------------------------------------------------===//
   // Instruction Emitting Hooks
   //

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 1a131bf32ef1..7877b8ca3a13 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -133,10 +133,6 @@ class SelectionDAGLegalize {
                                      SDValue N1, SDValue N2,
                                      ArrayRef<int> Mask) const;
 
-  bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
-                             bool &NeedInvert, const SDLoc &dl, SDValue &Chain,
-                             bool IsSignaling = false);
-
   SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
 
   void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -1685,152 +1681,6 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   Results.push_back(Tmp2);
 }
 
-/// Legalize a SETCC with given LHS and RHS and condition code CC on the current
-/// target.
-///
-/// If the SETCC has been legalized using AND / OR, then the legalized node
-/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
-/// will be set to false.
-///
-/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
-/// then the values of LHS and RHS will be swapped, CC will be set to the
-/// new condition, and NeedInvert will be set to false.
-///
-/// If the SETCC has been legalized using the inverse condcode, then LHS and
-/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
-/// will be set to true. The caller must invert the result of the SETCC with
-/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect
-/// of a true/false result.
-///
-/// \returns true if the SetCC has been legalized, false if it hasn't.
-bool SelectionDAGLegalize::LegalizeSetCCCondCode(
-    EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert,
-    const SDLoc &dl, SDValue &Chain, bool IsSignaling) {
-  MVT OpVT = LHS.getSimpleValueType();
-  ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
-  NeedInvert = false;
-  switch (TLI.getCondCodeAction(CCCode, OpVT)) {
-  default: llvm_unreachable("Unknown condition code action!");
-  case TargetLowering::Legal:
-    // Nothing to do.
-    break;
-  case TargetLowering::Expand: {
-    ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
-    if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
-      std::swap(LHS, RHS);
-      CC = DAG.getCondCode(InvCC);
-      return true;
-    }
-    // Swapping operands didn't work. Try inverting the condition.
-    bool NeedSwap = false;
-    InvCC = getSetCCInverse(CCCode, OpVT);
-    if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
-      // If inverting the condition is not enough, try swapping operands
-      // on top of it.
-      InvCC = ISD::getSetCCSwappedOperands(InvCC);
-      NeedSwap = true;
-    }
-    if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
-      CC = DAG.getCondCode(InvCC);
-      NeedInvert = true;
-      if (NeedSwap)
-        std::swap(LHS, RHS);
-      return true;
-    }
-
-    ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
-    unsigned Opc = 0;
-    switch (CCCode) {
-    default: llvm_unreachable("Don't know how to expand this condition!");
-    case ISD::SETUO:
-        if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
-          CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;
-          break;
-        }
-        assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
-               "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
-        NeedInvert = true;
-        LLVM_FALLTHROUGH;
-    case ISD::SETO:
-        assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
-            && "If SETO is expanded, SETOEQ must be legal!");
-        CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
-    case ISD::SETONE:
-    case ISD::SETUEQ:
-        // If the SETUO or SETO CC isn't legal, we might be able to use
-        // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
-        // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
-        // the operands.
-        CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
-        if (!TLI.isCondCodeLegal(CC2, OpVT) &&
-            (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
-             TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
-          CC1 = ISD::SETOGT;
-          CC2 = ISD::SETOLT;
-          Opc = ISD::OR;
-          NeedInvert = ((unsigned)CCCode & 0x8U);
-          break;
-        }
-        LLVM_FALLTHROUGH;
-    case ISD::SETOEQ:
-    case ISD::SETOGT:
-    case ISD::SETOGE:
-    case ISD::SETOLT:
-    case ISD::SETOLE:
-    case ISD::SETUNE:
-    case ISD::SETUGT:
-    case ISD::SETUGE:
-    case ISD::SETULT:
-    case ISD::SETULE:
-        // If we are floating point, assign and break, otherwise fall through.
-        if (!OpVT.isInteger()) {
-          // We can use the 4th bit to tell if we are the unordered
-          // or ordered version of the opcode.
-          CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
-          Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
-          CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
-          break;
-        }
-        // Fallthrough if we are unsigned integer.
-        LLVM_FALLTHROUGH;
-    case ISD::SETLE:
-    case ISD::SETGT:
-    case ISD::SETGE:
-    case ISD::SETLT:
-    case ISD::SETNE:
-    case ISD::SETEQ:
-      // If all combinations of inverting the condition and swapping operands
-      // didn't work then we have no means to expand the condition.
-      llvm_unreachable("Don't know how to expand this condition!");
-    }
-
-    SDValue SetCC1, SetCC2;
-    if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
-      // If we aren't the ordered or unorder operation,
-      // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
-      SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain,
-                            IsSignaling);
-      SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain,
-                            IsSignaling);
-    } else {
-      // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
-      SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain,
-                            IsSignaling);
-      SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain,
-                            IsSignaling);
-    }
-    if (Chain)
-      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
-                          SetCC2.getValue(1));
-    LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
-    RHS = SDValue();
-    CC  = SDValue();
-    return true;
-  }
-  }
-  return false;
-}
-
 /// Emit a store/load combination to the stack.  This stores
 /// SrcOp to a stack slot of type SlotVT, truncating it if needed.  It then does
 /// a load from the stack slot to DestVT, extending it if needed.
@@ -3729,8 +3579,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp2 = Node->getOperand(1 + Offset);
     Tmp3 = Node->getOperand(2 + Offset);
     bool Legalized =
-        LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3,
-                              NeedInvert, dl, Chain, IsSignaling);
+        TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3,
+                                  NeedInvert, dl, Chain, IsSignaling);
 
     if (Legalized) {
       // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
@@ -3825,8 +3675,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
 
     if (!Legalized) {
-      Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
-                                        Tmp1, Tmp2, CC, NeedInvert, dl, Chain);
+      Legalized = TLI.LegalizeSetCCCondCode(
+          DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC,
+          NeedInvert, dl, Chain);
 
       assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
 
@@ -3860,8 +3711,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp4 = Node->getOperand(1);              // CC
 
     bool Legalized =
-        LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2,
-                              Tmp3, Tmp4, NeedInvert, dl, Chain);
+        TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()),
+                                  Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain);
     (void)Legalized;
     assert(Legalized && "Can't legalize BR_CC with legal condition!");
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1cffe20cbb83..8dd8da352734 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -138,6 +138,7 @@ class VectorLegalizer {
   SDValue ExpandStore(SDNode *N);
   SDValue ExpandFNEG(SDNode *Node);
   void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -396,7 +397,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SELECT:
   case ISD::VSELECT:
   case ISD::SELECT_CC:
-  case ISD::SETCC:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND:
   case ISD::TRUNCATE:
@@ -495,6 +495,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     Action = TLI.getOperationAction(Node->getOpcode(),
                                     Node->getOperand(1).getValueType());
     break;
+  case ISD::SETCC: {
+    MVT OpVT = Node->getOperand(0).getSimpleValueType();
+    ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+    Action = TLI.getCondCodeAction(CCCode, OpVT);
+    if (Action == TargetLowering::Legal)
+      Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    break;
+  }
   }
 
   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@@ -762,7 +770,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
     ExpandFSUB(Node, Results);
     return;
   case ISD::SETCC:
-    Results.push_back(UnrollVSETCC(Node));
+    ExpandSETCC(Node, Results);
     return;
   case ISD::ABS:
     if (TLI.expandABS(Node, Tmp, DAG)) {
@@ -1331,6 +1339,50 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node,
   Results.push_back(Tmp);
 }
 
+void VectorLegalizer::ExpandSETCC(SDNode *Node,
+                                  SmallVectorImpl<SDValue> &Results) {
+  bool NeedInvert = false;
+  SDLoc dl(Node);
+  MVT OpVT = Node->getOperand(0).getSimpleValueType();
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+
+  if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
+    Results.push_back(UnrollVSETCC(Node));
+    return;
+  }
+
+  SDValue Chain;
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  SDValue CC = Node->getOperand(2);
+  bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS,
+                                             RHS, CC, NeedInvert, dl, Chain);
+
+  if (Legalized) {
+    // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+    // condition code, create a new SETCC node.
+    if (CC.getNode())
+      LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
+                        Node->getFlags());
+
+    // If we expanded the SETCC by inverting the condition code, then wrap
+    // the existing SETCC in a NOT to restore the intended condition.
+    if (NeedInvert)
+      LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
+  } else {
+    // Otherwise, SETCC for the given comparison type must be completely
+    // illegal; expand it into a SELECT_CC.
+    EVT VT = Node->getValueType(0);
+    LHS =
+        DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
+                    DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
+                    DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
+    LHS->setFlags(Node->getFlags());
+  }
+
+  Results.push_back(LHS);
+}
+
 void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
                                      SmallVectorImpl<SDValue> &Results) {
   SDValue Result, Overflow;

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b02a65e91ff3..04067ffd2303 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8698,3 +8698,137 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
   return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
                      MachinePointerInfo::getUnknownStack(MF));
 }
+
+bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
+                                           SDValue &LHS, SDValue &RHS,
+                                           SDValue &CC, bool &NeedInvert,
+                                           const SDLoc &dl, SDValue &Chain,
+                                           bool IsSignaling) const {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MVT OpVT = LHS.getSimpleValueType();
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+  NeedInvert = false;
+  switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+  default:
+    llvm_unreachable("Unknown condition code action!");
+  case TargetLowering::Legal:
+    // Nothing to do.
+    break;
+  case TargetLowering::Expand: {
+    ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
+    if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+      std::swap(LHS, RHS);
+      CC = DAG.getCondCode(InvCC);
+      return true;
+    }
+    // Swapping operands didn't work. Try inverting the condition.
+    bool NeedSwap = false;
+    InvCC = getSetCCInverse(CCCode, OpVT);
+    if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+      // If inverting the condition is not enough, try swapping operands
+      // on top of it.
+      InvCC = ISD::getSetCCSwappedOperands(InvCC);
+      NeedSwap = true;
+    }
+    if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+      CC = DAG.getCondCode(InvCC);
+      NeedInvert = true;
+      if (NeedSwap)
+        std::swap(LHS, RHS);
+      return true;
+    }
+
+    ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+    unsigned Opc = 0;
+    switch (CCCode) {
+    default:
+      llvm_unreachable("Don't know how to expand this condition!");
+    case ISD::SETUO:
+      if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
+        CC1 = ISD::SETUNE;
+        CC2 = ISD::SETUNE;
+        Opc = ISD::OR;
+        break;
+      }
+      assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+             "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
+      NeedInvert = true;
+      LLVM_FALLTHROUGH;
+    case ISD::SETO:
+      assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+             "If SETO is expanded, SETOEQ must be legal!");
+      CC1 = ISD::SETOEQ;
+      CC2 = ISD::SETOEQ;
+      Opc = ISD::AND;
+      break;
+    case ISD::SETONE:
+    case ISD::SETUEQ:
+      // If the SETUO or SETO CC isn't legal, we might be able to use
+      // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
+      // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
+      // the operands.
+      CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+      if (!TLI.isCondCodeLegal(CC2, OpVT) &&
+          (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
+           TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
+        CC1 = ISD::SETOGT;
+        CC2 = ISD::SETOLT;
+        Opc = ISD::OR;
+        NeedInvert = ((unsigned)CCCode & 0x8U);
+        break;
+      }
+      LLVM_FALLTHROUGH;
+    case ISD::SETOEQ:
+    case ISD::SETOGT:
+    case ISD::SETOGE:
+    case ISD::SETOLT:
+    case ISD::SETOLE:
+    case ISD::SETUNE:
+    case ISD::SETUGT:
+    case ISD::SETUGE:
+    case ISD::SETULT:
+    case ISD::SETULE:
+      // If we are floating point, assign and break, otherwise fall through.
+      if (!OpVT.isInteger()) {
+        // We can use the 4th bit to tell if we are the unordered
+        // or ordered version of the opcode.
+        CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+        Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+        CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+        break;
+      }
+      // Fallthrough if we are unsigned integer.
+      LLVM_FALLTHROUGH;
+    case ISD::SETLE:
+    case ISD::SETGT:
+    case ISD::SETGE:
+    case ISD::SETLT:
+    case ISD::SETNE:
+    case ISD::SETEQ:
+      // If all combinations of inverting the condition and swapping operands
+      // didn't work then we have no means to expand the condition.
+      llvm_unreachable("Don't know how to expand this condition!");
+    }
+
+    SDValue SetCC1, SetCC2;
+    if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+      // If we aren't the ordered or unorder operation,
+      // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
+      SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+    } else {
+      // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
+      SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+    }
+    if (Chain)
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
+                          SetCC2.getValue(1));
+    LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+    RHS = SDValue();
+    CC = SDValue();
+    return true;
+  }
+  }
+  return false;
+}

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6497d8e9f052..000cbf856c62 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1388,6 +1388,20 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
   // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
 
+  if (VT.isFloatingPoint()) {
+    setCondCodeAction(ISD::SETO, VT, Expand);
+    setCondCodeAction(ISD::SETOLT, VT, Expand);
+    setCondCodeAction(ISD::SETLT, VT, Expand);
+    setCondCodeAction(ISD::SETOLE, VT, Expand);
+    setCondCodeAction(ISD::SETLE, VT, Expand);
+    setCondCodeAction(ISD::SETULT, VT, Expand);
+    setCondCodeAction(ISD::SETULE, VT, Expand);
+    setCondCodeAction(ISD::SETUGE, VT, Expand);
+    setCondCodeAction(ISD::SETUGT, VT, Expand);
+    setCondCodeAction(ISD::SETUEQ, VT, Expand);
+    setCondCodeAction(ISD::SETUNE, VT, Expand);
+  }
+
   // Lower fixed length vector operations to scalable equivalents.
   setOperationAction(ISD::ABS, VT, Custom);
   setOperationAction(ISD::ADD, VT, Custom);
@@ -10389,11 +10403,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
 
 SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
                                            SelectionDAG &DAG) const {
-  if (Op.getValueType().isScalableVector()) {
-    if (Op.getOperand(0).getValueType().isFloatingPoint())
-      return Op;
+  if (Op.getValueType().isScalableVector())
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
-  }
 
   if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
     return LowerFixedLengthVectorSetccToSVE(Op, DAG);
@@ -17455,10 +17466,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
   assert(Op.getValueType() == InVT.changeTypeToInteger() &&
          "Expected integer result of the same bit length as the inputs!");
 
-  // Expand floating point vector comparisons.
-  if (InVT.isFloatingPoint())
-    return SDValue();
-
   auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
   auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
   auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 63a53cc0c8f1..d30bfe45335d 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -262,18 +262,6 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
 
 def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
 
-def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
-                               [(setoge node:$lhs, node:$rhs),
-                                (setge node:$lhs, node:$rhs)]>;
-def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
-                                [(setogt node:$lhs, node:$rhs),
-                                 (setgt node:$lhs, node:$rhs)]>;
-def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
-                                [(setoeq node:$lhs, node:$rhs),
-                                 (seteq node:$lhs, node:$rhs)]>;
-def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
-                                [(setone node:$lhs, node:$rhs),
-                                 (setne node:$lhs, node:$rhs)]>;
 def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
                                   (AArch64mul_p node:$pred, node:$src1, node:$src2), [{
   return N->hasOneUse();
@@ -1252,11 +1240,11 @@ let Predicates = [HasSVE] in {
   defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
   defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
 
-  defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
-  defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
-  defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
-  defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
-  defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
+  defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, SETOGE, SETGE, SETOLE, SETLE>;
+  defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, SETOGT, SETGT, SETOLT, SETLT>;
+  defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, SETOEQ, SETEQ, SETOEQ, SETEQ>;
+  defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, SETONE, SETNE, SETONE, SETNE>;
+  defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, SETUO, SETUO, SETUO, SETUO>;
   defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
   defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
 

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 178c83b98599..ebeeea639c9c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4685,20 +4685,22 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
 }
 
 multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
-                              SDPatternOperator op_nopred>
+                              CondCode cc1, CondCode cc2,
+                              CondCode invcc1, CondCode invcc2>
 : sve_fp_3op_p_pd<opc, asm, op> {
-  def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8f16, nxv8f16,
-                               !cast<Instruction>(NAME # _H), PTRUE_H>;
-  def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f16, nxv4f16,
-                               !cast<Instruction>(NAME # _H), PTRUE_S>;
-  def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f16, nxv2f16,
-                               !cast<Instruction>(NAME # _H), PTRUE_D>;
-  def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f32, nxv4f32,
-                               !cast<Instruction>(NAME # _S), PTRUE_S>;
-  def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f32, nxv2f32,
-                               !cast<Instruction>(NAME # _S), PTRUE_D>;
-  def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f64, nxv2f64,
-                               !cast<Instruction>(NAME # _D), PTRUE_D>;
+  defm : SVE_SETCC_Pat<cc1, invcc1, nxv8i1,  nxv8f16, !cast<Instruction>(NAME # _H)>;
+  defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1,  nxv4f16, !cast<Instruction>(NAME # _H)>;
+  defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1,  nxv2f16, !cast<Instruction>(NAME # _H)>;
+  defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1,  nxv4f32, !cast<Instruction>(NAME # _S)>;
+  defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1,  nxv2f32, !cast<Instruction>(NAME # _S)>;
+  defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1,  nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+  defm : SVE_SETCC_Pat<cc2, invcc2, nxv8i1,  nxv8f16, !cast<Instruction>(NAME # _H)>;
+  defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1,  nxv4f16, !cast<Instruction>(NAME # _H)>;
+  defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1,  nxv2f16, !cast<Instruction>(NAME # _H)>;
+  defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1,  nxv4f32, !cast<Instruction>(NAME # _S)>;
+  defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1,  nxv2f32, !cast<Instruction>(NAME # _S)>;
+  defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1,  nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
new file mode 100644
index 000000000000..baf01768d55c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
@@ -0,0 +1,762 @@
+; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
+; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
+; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Don't use SVE when its registers are no bigger than NEON.
+; NO_SVE-NOT: z{0-9}
+
+;
+; FCMP OEQ
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v4f16:
+; CHECK: fcmeq v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+  %cmp = fcmp oeq <4 x half> %op1, %op2
+  %sext = sext <4 x i1> %cmp to <4 x i16>
+  ret <4 x i16> %sext
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v8f16:
+; CHECK: fcmeq v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+  %cmp = fcmp oeq <8 x half> %op1, %op2
+  %sext = sext <8 x i1> %cmp to <8 x i16>
+  ret <8 x i16> %sext
+}
+
+define void @fcmp_oeq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp oeq <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+define void @fcmp_oeq_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h
+; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1
+; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h
+; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1
+; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x2]
+; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[C_HI]]]
+; VBITS_EQ_256-NEXT: ret
+  %op1 = load <32 x half>, <32 x half>* %a
+  %op2 = load <32 x half>, <32 x half>* %b
+  %cmp = fcmp oeq <32 x half> %op1, %op2
+  %sext = sext <32 x i1> %cmp to <32 x i16>
+  store <32 x i16> %sext, <32 x i16>* %c
+  ret void
+}
+
+define void @fcmp_oeq_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; VBITS_GE_1024-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; VBITS_GE_1024-NEXT: ret
+  %op1 = load <64 x half>, <64 x half>* %a
+  %op2 = load <64 x half>, <64 x half>* %b
+  %cmp = fcmp oeq <64 x half> %op1, %op2
+  %sext = sext <64 x i1> %cmp to <64 x i16>
+  store <64 x i16> %sext, <64 x i16>* %c
+  ret void
+}
+
+define void @fcmp_oeq_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v128f16:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
+; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; VBITS_GE_2048-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; VBITS_GE_2048-NEXT: ret
+  %op1 = load <128 x half>, <128 x half>* %a
+  %op2 = load <128 x half>, <128 x half>* %b
+  %cmp = fcmp oeq <128 x half> %op1, %op2
+  %sext = sext <128 x i1> %cmp to <128 x i16>
+  store <128 x i16> %sext, <128 x i16>* %c
+  ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v2f32:
+; CHECK: fcmeq v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+  %cmp = fcmp oeq <2 x float> %op1, %op2
+  %sext = sext <2 x i1> %cmp to <2 x i32>
+  ret <2 x i32> %sext
+}
+
+; Don't use SVE for 128-bit vectors.
+define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v4f32:
+; CHECK: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+  %cmp = fcmp oeq <4 x float> %op1, %op2
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %sext
+}
+
+define void @fcmp_oeq_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i32>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v8f32:
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
+; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <8 x float>, <8 x float>* %a
+  %op2 = load <8 x float>, <8 x float>* %b
+  %cmp = fcmp oeq <8 x float> %op1, %op2
+  %sext = sext <8 x i1> %cmp to <8 x i32>
+  store <8 x i32> %sext, <8 x i32>* %c
+  ret void
+}
+
+define void @fcmp_oeq_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i32>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v16f32:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
+; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
+; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
+; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
+; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s
+; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1
+; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s
+; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1
+; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x2]
+; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[C_HI]]]
+; VBITS_EQ_256-NEXT: ret
+  %op1 = load <16 x float>, <16 x float>* %a
+  %op2 = load <16 x float>, <16 x float>* %b
+  %cmp = fcmp oeq <16 x float> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i32>
+  store <16 x i32> %sext, <16 x i32>* %c
+  ret void
+}
+
+define void @fcmp_oeq_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i32>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v32f32:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
+; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
+; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
+; VBITS_GE_1024-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
+; VBITS_GE_1024-NEXT: ret
+  %op1 = load <32 x float>, <32 x float>* %a
+  %op2 = load <32 x float>, <32 x float>* %b
+  %cmp = fcmp oeq <32 x float> %op1, %op2
+  %sext = sext <32 x i1> %cmp to <32 x i32>
+  store <32 x i32> %sext, <32 x i32>* %c
+  ret void
+}
+
+define void @fcmp_oeq_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i32>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v64f32:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
+; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
+; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
+; VBITS_GE_2048-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
+; VBITS_GE_2048-NEXT: ret
+  %op1 = load <64 x float>, <64 x float>* %a
+  %op2 = load <64 x float>, <64 x float>* %b
+  %cmp = fcmp oeq <64 x float> %op1, %op2
+  %sext = sext <64 x i1> %cmp to <64 x i32>
+  store <64 x i32> %sext, <64 x i32>* %c
+  ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v1f64:
+; CHECK: fcmeq d0, d0, d1
+; CHECK-NEXT: ret
+  %cmp = fcmp oeq <1 x double> %op1, %op2
+  %sext = sext <1 x i1> %cmp to <1 x i64>
+  ret <1 x i64> %sext
+}
+
+; Don't use SVE for 128-bit vectors.
+define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v2f64:
+; CHECK: fcmeq v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+  %cmp = fcmp oeq <2 x double> %op1, %op2
+  %sext = sext <2 x i1> %cmp to <2 x i64>
+  ret <2 x i64> %sext
+}
+
+define void @fcmp_oeq_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i64>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v4f64:
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
+; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <4 x double>, <4 x double>* %a
+  %op2 = load <4 x double>, <4 x double>* %b
+  %cmp = fcmp oeq <4 x double> %op1, %op2
+  %sext = sext <4 x i1> %cmp to <4 x i64>
+  store <4 x i64> %sext, <4 x i64>* %c
+  ret void
+}
+
+define void @fcmp_oeq_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i64>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v8f64:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
+; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
+; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
+; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
+; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d
+; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1
+; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d
+; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1
+; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x2]
+; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[C_HI]]]
+; VBITS_EQ_256-NEXT: ret
+  %op1 = load <8 x double>, <8 x double>* %a
+  %op2 = load <8 x double>, <8 x double>* %b
+  %cmp = fcmp oeq <8 x double> %op1, %op2
+  %sext = sext <8 x i1> %cmp to <8 x i64>
+  store <8 x i64> %sext, <8 x i64>* %c
+  ret void
+}
+
+define void @fcmp_oeq_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i64>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v16f64:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
+; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
+; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
+; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
+; VBITS_GE_1024-NEXT: ret
+  %op1 = load <16 x double>, <16 x double>* %a
+  %op2 = load <16 x double>, <16 x double>* %b
+  %cmp = fcmp oeq <16 x double> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i64>
+  store <16 x i64> %sext, <16 x i64>* %c
+  ret void
+}
+
+define void @fcmp_oeq_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i64>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v32f64:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
+; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
+; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
+; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
+; VBITS_GE_2048-NEXT: ret
+  %op1 = load <32 x double>, <32 x double>* %a
+  %op2 = load <32 x double>, <32 x double>* %b
+  %cmp = fcmp oeq <32 x double> %op1, %op2
+  %sext = sext <32 x i1> %cmp to <32 x i64>
+  store <32 x i64> %sext, <32 x i64>* %c
+  ret void
+}
+
+;
+; FCMP UEQ
+;
+
+define void @fcmp_ueq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ueq_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp ueq <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP ONE
+;
+
+define void @fcmp_one_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_one_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp one <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP UNE
+;
+
+define void @fcmp_une_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_une_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp une <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP OGT
+;
+
+define void @fcmp_ogt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ogt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp ogt <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP UGT
+;
+
+define void @fcmp_ugt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ugt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp ugt <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP OLT
+;
+
+define void @fcmp_olt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_olt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp olt <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP ULT
+;
+
+define void @fcmp_ult_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ult_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp ult <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP OGE
+;
+
+define void @fcmp_oge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oge_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp oge <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP UGE
+;
+
+define void @fcmp_uge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_uge_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp uge <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP OLE
+;
+
+define void @fcmp_ole_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ole_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp ole <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP ULE
+;
+
+define void @fcmp_ule_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ule_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp ule <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP UNO
+;
+
+define void @fcmp_uno_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_uno_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmuo [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp uno <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP ORD
+;
+
+define void @fcmp_ord_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ord_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmuo [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp ord <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP EQ
+;
+
+define void @fcmp_eq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_eq_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp fast oeq <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP NE
+;
+
+define void @fcmp_ne_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ne_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp fast one <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP GT
+;
+
+define void @fcmp_gt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_gt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp fast ogt <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP LT
+;
+
+define void @fcmp_lt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_lt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp fast olt <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP GE
+;
+
+define void @fcmp_ge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ge_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp fast oge <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+;
+; FCMP LE
+;
+
+define void @fcmp_le_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_le_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+  %op1 = load <16 x half>, <16 x half>* %a
+  %op2 = load <16 x half>, <16 x half>* %b
+  %cmp = fcmp fast ole <16 x half> %op1, %op2
+  %sext = sext <16 x i1> %cmp to <16 x i16>
+  store <16 x i16> %sext, <16 x i16>* %c
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }

diff  --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 73cca71b539c..43bb0469cc91 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -626,32 +626,32 @@ define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvcmpgtsp vs0, v5, v4
 ; CHECK-NEXT:    xvcmpgtsp vs1, v4, v5
-; CHECK-NEXT:    xxlnor vs0, vs1, vs0
-; CHECK-NEXT:    xxsel v2, v3, v2, vs0
+; CHECK-NEXT:    xxlor vs0, vs1, vs0
+; CHECK-NEXT:    xxsel v2, v2, v3, vs0
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test22:
 ; CHECK-REG:       # %bb.0: # %entry
 ; CHECK-REG-NEXT:    xvcmpgtsp vs0, v5, v4
 ; CHECK-REG-NEXT:    xvcmpgtsp vs1, v4, v5
-; CHECK-REG-NEXT:    xxlnor vs0, vs1, vs0
-; CHECK-REG-NEXT:    xxsel v2, v3, v2, vs0
+; CHECK-REG-NEXT:    xxlor vs0, vs1, vs0
+; CHECK-REG-NEXT:    xxsel v2, v2, v3, vs0
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test22:
 ; CHECK-FISL:       # %bb.0: # %entry
 ; CHECK-FISL-NEXT:    xvcmpgtsp vs1, v5, v4
 ; CHECK-FISL-NEXT:    xvcmpgtsp vs0, v4, v5
-; CHECK-FISL-NEXT:    xxlnor vs0, vs0, vs1
-; CHECK-FISL-NEXT:    xxsel v2, v3, v2, vs0
+; CHECK-FISL-NEXT:    xxlor vs0, vs0, vs1
+; CHECK-FISL-NEXT:    xxsel v2, v2, v3, vs0
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test22:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xvcmpgtsp vs0, v5, v4
 ; CHECK-LE-NEXT:    xvcmpgtsp vs1, v4, v5
-; CHECK-LE-NEXT:    xxlnor vs0, vs1, vs0
-; CHECK-LE-NEXT:    xxsel v2, v3, v2, vs0
+; CHECK-LE-NEXT:    xxlor vs0, vs1, vs0
+; CHECK-LE-NEXT:    xxsel v2, v2, v3, vs0
 ; CHECK-LE-NEXT:    blr
 entry:
   %m = fcmp ueq <4 x float> %c, %d

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
index 8c880c236bb7..6c14ed5f738f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
@@ -285,9 +285,8 @@ define void @fcmp_ult_vv_v8f64(<8 x double>* %x, <8 x double>* %y, <8 x i1>* %z)
 ; CHECK-NEXT:    vle64.v v28, (a0)
 ; CHECK-NEXT:    vle64.v v8, (a1)
 ; CHECK-NEXT:    vmfle.vv v25, v8, v28
-; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
-; CHECK-NEXT:    vmnand.mm v25, v25, v25
 ; CHECK-NEXT:    vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT:    vmnand.mm v25, v25, v25
 ; CHECK-NEXT:    vse1.v v25, (a2)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, <8 x double>* %x
@@ -358,8 +357,8 @@ define void @fcmp_ueq_vv_v32f32(<32 x float>* %x, <32 x float>* %y, <32 x i1>* %
 ; CHECK-NEXT:    vsetvli a4, a3, e32,m8,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v16, (a1)
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmflt.vv v26, v8, v16
+; CHECK-NEXT:    vmflt.vv v25, v8, v16
+; CHECK-NEXT:    vmflt.vv v26, v16, v8
 ; CHECK-NEXT:    vsetvli a0, a3, e8,m2,ta,mu
 ; CHECK-NEXT:    vmnor.mm v25, v26, v25
 ; CHECK-NEXT:    vse1.v v25, (a2)
@@ -395,8 +394,8 @@ define void @fcmp_one_vv_v8f64(<16 x double>* %x, <16 x double>* %y, <16 x i1>*
 ; CHECK-NEXT:    vsetivli a3, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    vle64.v v16, (a1)
-; CHECK-NEXT:    vmflt.vv v25, v16, v8
-; CHECK-NEXT:    vmflt.vv v26, v8, v16
+; CHECK-NEXT:    vmflt.vv v25, v8, v16
+; CHECK-NEXT:    vmflt.vv v26, v16, v8
 ; CHECK-NEXT:    vsetivli a0, 16, e8,m1,ta,mu
 ; CHECK-NEXT:    vmor.mm v25, v26, v25
 ; CHECK-NEXT:    vse1.v v25, (a2)
@@ -433,9 +432,8 @@ define void @fcmp_ord_vv_v4f16(<4 x half>* %x, <4 x half>* %y, <4 x i1>* %z) {
 ; CHECK-NEXT:    vle16.v v26, (a0)
 ; CHECK-NEXT:    vmfeq.vv v27, v25, v25
 ; CHECK-NEXT:    vmfeq.vv v25, v26, v26
-; CHECK-NEXT:    vsetivli a0, 4, e8,mf2,ta,mu
-; CHECK-NEXT:    vmand.mm v25, v25, v27
 ; CHECK-NEXT:    vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT:    vmand.mm v25, v25, v27
 ; CHECK-NEXT:    vse1.v v25, (a2)
 ; CHECK-NEXT:    ret
   %a = load <4 x half>, <4 x half>* %x
@@ -453,9 +451,8 @@ define void @fcmp_uno_vv_v4f16(<2 x half>* %x, <2 x half>* %y, <2 x i1>* %z) {
 ; CHECK-NEXT:    vle16.v v26, (a0)
 ; CHECK-NEXT:    vmfne.vv v27, v25, v25
 ; CHECK-NEXT:    vmfne.vv v25, v26, v26
-; CHECK-NEXT:    vsetivli a0, 2, e8,mf2,ta,mu
-; CHECK-NEXT:    vmor.mm v25, v25, v27
 ; CHECK-NEXT:    vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT:    vmor.mm v25, v25, v27
 ; CHECK-NEXT:    vse1.v v25, (a2)
 ; CHECK-NEXT:    ret
   %a = load <2 x half>, <2 x half>* %x
@@ -747,9 +744,8 @@ define void @fcmp_ult_vf_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) {
 ; CHECK-NEXT:    vsetivli a2, 8, e64,m4,ta,mu
 ; CHECK-NEXT:    vle64.v v28, (a0)
 ; CHECK-NEXT:    vmfge.vf v25, v28, fa0
-; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
-; CHECK-NEXT:    vmnand.mm v25, v25, v25
 ; CHECK-NEXT:    vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT:    vmnand.mm v25, v25, v25
 ; CHECK-NEXT:    vse1.v v25, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, <8 x double>* %x
@@ -820,8 +816,8 @@ define void @fcmp_ueq_vf_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) {
 ; CHECK-NEXT:    addi a2, zero, 32
 ; CHECK-NEXT:    vsetvli a3, a2, e32,m8,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmfgt.vf v25, v8, fa0
-; CHECK-NEXT:    vmflt.vf v26, v8, fa0
+; CHECK-NEXT:    vmflt.vf v25, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v26, v8, fa0
 ; CHECK-NEXT:    vsetvli a0, a2, e8,m2,ta,mu
 ; CHECK-NEXT:    vmnor.mm v25, v26, v25
 ; CHECK-NEXT:    vse1.v v25, (a1)
@@ -857,8 +853,8 @@ define void @fcmp_one_vf_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli a2, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmfgt.vf v25, v8, fa0
-; CHECK-NEXT:    vmflt.vf v26, v8, fa0
+; CHECK-NEXT:    vmflt.vf v25, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v26, v8, fa0
 ; CHECK-NEXT:    vsetivli a0, 16, e8,m1,ta,mu
 ; CHECK-NEXT:    vmor.mm v25, v26, v25
 ; CHECK-NEXT:    vse1.v v25, (a1)
@@ -896,9 +892,8 @@ define void @fcmp_ord_vf_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
 ; CHECK-NEXT:    vfmv.v.f v26, fa0
 ; CHECK-NEXT:    vmfeq.vf v27, v26, fa0
 ; CHECK-NEXT:    vmfeq.vv v26, v25, v25
-; CHECK-NEXT:    vsetivli a0, 4, e8,mf2,ta,mu
-; CHECK-NEXT:    vmand.mm v25, v26, v27
 ; CHECK-NEXT:    vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT:    vmand.mm v25, v26, v27
 ; CHECK-NEXT:    vse1.v v25, (a1)
 ; CHECK-NEXT:    ret
   %a = load <4 x half>, <4 x half>* %x
@@ -917,9 +912,8 @@ define void @fcmp_uno_vf_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
 ; CHECK-NEXT:    vfmv.v.f v26, fa0
 ; CHECK-NEXT:    vmfne.vf v27, v26, fa0
 ; CHECK-NEXT:    vmfne.vv v26, v25, v25
-; CHECK-NEXT:    vsetivli a0, 2, e8,mf2,ta,mu
-; CHECK-NEXT:    vmor.mm v25, v26, v27
 ; CHECK-NEXT:    vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT:    vmor.mm v25, v26, v27
 ; CHECK-NEXT:    vse1.v v25, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x half>, <2 x half>* %x
@@ -1212,9 +1206,8 @@ define void @fcmp_ult_fv_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) {
 ; CHECK-NEXT:    vsetivli a2, 8, e64,m4,ta,mu
 ; CHECK-NEXT:    vle64.v v28, (a0)
 ; CHECK-NEXT:    vmfle.vf v25, v28, fa0
-; CHECK-NEXT:    vsetivli a0, 8, e8,mf2,ta,mu
-; CHECK-NEXT:    vmnand.mm v25, v25, v25
 ; CHECK-NEXT:    vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT:    vmnand.mm v25, v25, v25
 ; CHECK-NEXT:    vse1.v v25, (a1)
 ; CHECK-NEXT:    ret
   %a = load <8 x double>, <8 x double>* %x
@@ -1285,8 +1278,8 @@ define void @fcmp_ueq_fv_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) {
 ; CHECK-NEXT:    addi a2, zero, 32
 ; CHECK-NEXT:    vsetvli a3, a2, e32,m8,ta,mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmflt.vf v25, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v26, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v25, v8, fa0
+; CHECK-NEXT:    vmflt.vf v26, v8, fa0
 ; CHECK-NEXT:    vsetvli a0, a2, e8,m2,ta,mu
 ; CHECK-NEXT:    vmnor.mm v25, v26, v25
 ; CHECK-NEXT:    vse1.v v25, (a1)
@@ -1322,8 +1315,8 @@ define void @fcmp_one_fv_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli a2, 16, e64,m8,ta,mu
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmflt.vf v25, v8, fa0
-; CHECK-NEXT:    vmfgt.vf v26, v8, fa0
+; CHECK-NEXT:    vmfgt.vf v25, v8, fa0
+; CHECK-NEXT:    vmflt.vf v26, v8, fa0
 ; CHECK-NEXT:    vsetivli a0, 16, e8,m1,ta,mu
 ; CHECK-NEXT:    vmor.mm v25, v26, v25
 ; CHECK-NEXT:    vse1.v v25, (a1)
@@ -1359,11 +1352,10 @@ define void @fcmp_ord_fv_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
 ; CHECK-NEXT:    vsetivli a2, 4, e16,m1,ta,mu
 ; CHECK-NEXT:    vle16.v v25, (a0)
 ; CHECK-NEXT:    vfmv.v.f v26, fa0
-; CHECK-NEXT:    vmfeq.vf v27, v26, fa0
-; CHECK-NEXT:    vmfeq.vv v26, v25, v25
-; CHECK-NEXT:    vsetivli a0, 4, e8,mf2,ta,mu
-; CHECK-NEXT:    vmand.mm v25, v27, v26
+; CHECK-NEXT:    vmfeq.vv v27, v25, v25
+; CHECK-NEXT:    vmfeq.vf v25, v26, fa0
 ; CHECK-NEXT:    vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT:    vmand.mm v25, v25, v27
 ; CHECK-NEXT:    vse1.v v25, (a1)
 ; CHECK-NEXT:    ret
   %a = load <4 x half>, <4 x half>* %x
@@ -1380,11 +1372,10 @@ define void @fcmp_uno_fv_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
 ; CHECK-NEXT:    vsetivli a2, 2, e16,m1,ta,mu
 ; CHECK-NEXT:    vle16.v v25, (a0)
 ; CHECK-NEXT:    vfmv.v.f v26, fa0
-; CHECK-NEXT:    vmfne.vf v27, v26, fa0
-; CHECK-NEXT:    vmfne.vv v26, v25, v25
-; CHECK-NEXT:    vsetivli a0, 2, e8,mf2,ta,mu
-; CHECK-NEXT:    vmor.mm v25, v27, v26
+; CHECK-NEXT:    vmfne.vv v27, v25, v25
+; CHECK-NEXT:    vmfne.vf v25, v26, fa0
 ; CHECK-NEXT:    vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT:    vmor.mm v25, v25, v27
 ; CHECK-NEXT:    vse1.v v25, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x half>, <2 x half>* %x