[llvm] 9745dce - [SelectionDAG][AArch64][SVE] Perform SETCC condition legalization in LegalizeVectorOps
Bradley Smith via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 29 07:42:43 PDT 2021
Author: Bradley Smith
Date: 2021-03-29T15:32:25+01:00
New Revision: 9745dce8c3dc6996dae9bab11ff410a12d8daec7
URL: https://github.com/llvm/llvm-project/commit/9745dce8c3dc6996dae9bab11ff410a12d8daec7
DIFF: https://github.com/llvm/llvm-project/commit/9745dce8c3dc6996dae9bab11ff410a12d8daec7.diff
LOG: [SelectionDAG][AArch64][SVE] Perform SETCC condition legalization in LegalizeVectorOps
This is currently performed in SelectionDAGLegalize, here we make it also
happen in LegalizeVectorOps, allowing a target to lower the SETCC condition
codes first in LegalizeVectorOps and then lower to a custom node afterwards,
without having to duplicate all of the SETCC condition legalization in the
target specific lowering.
As a result of this, fixed length floating point SETCC nodes can now be
properly lowered for SVE.
Differential Revision: https://reviews.llvm.org/D98939
Added:
llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/PowerPC/vsx.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 793acb6263d2..4b964dc26218 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4513,6 +4513,29 @@ class TargetLowering : public TargetLoweringBase {
/// method accepts vectors as its arguments.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const;
+ /// Legalize a SETCC with given LHS and RHS and condition code CC on the
+ /// current target.
+ ///
+ /// If the SETCC has been legalized using AND / OR, then the legalized node
+ /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
+ /// will be set to false.
+ ///
+ /// If the SETCC has been legalized by using getSetCCSwappedOperands(),
+ /// then the values of LHS and RHS will be swapped, CC will be set to the
+ /// new condition, and NeedInvert will be set to false.
+ ///
+ /// If the SETCC has been legalized using the inverse condcode, then LHS and
+ /// RHS will be unchanged, CC will set to the inverted condcode, and
+ /// NeedInvert will be set to true. The caller must invert the result of the
+ /// SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to swap
+ /// the effect of a true/false result.
+ ///
+ /// \returns true if the SetCC has been legalized, false if it hasn't.
+ bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS,
+ SDValue &RHS, SDValue &CC, bool &NeedInvert,
+ const SDLoc &dl, SDValue &Chain,
+ bool IsSignaling = false) const;
+
//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
//
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 1a131bf32ef1..7877b8ca3a13 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -133,10 +133,6 @@ class SelectionDAGLegalize {
SDValue N1, SDValue N2,
ArrayRef<int> Mask) const;
- bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
- bool &NeedInvert, const SDLoc &dl, SDValue &Chain,
- bool IsSignaling = false);
-
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -1685,152 +1681,6 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
Results.push_back(Tmp2);
}
-/// Legalize a SETCC with given LHS and RHS and condition code CC on the current
-/// target.
-///
-/// If the SETCC has been legalized using AND / OR, then the legalized node
-/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
-/// will be set to false.
-///
-/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
-/// then the values of LHS and RHS will be swapped, CC will be set to the
-/// new condition, and NeedInvert will be set to false.
-///
-/// If the SETCC has been legalized using the inverse condcode, then LHS and
-/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
-/// will be set to true. The caller must invert the result of the SETCC with
-/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect
-/// of a true/false result.
-///
-/// \returns true if the SetCC has been legalized, false if it hasn't.
-bool SelectionDAGLegalize::LegalizeSetCCCondCode(
- EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert,
- const SDLoc &dl, SDValue &Chain, bool IsSignaling) {
- MVT OpVT = LHS.getSimpleValueType();
- ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
- NeedInvert = false;
- switch (TLI.getCondCodeAction(CCCode, OpVT)) {
- default: llvm_unreachable("Unknown condition code action!");
- case TargetLowering::Legal:
- // Nothing to do.
- break;
- case TargetLowering::Expand: {
- ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
- if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
- std::swap(LHS, RHS);
- CC = DAG.getCondCode(InvCC);
- return true;
- }
- // Swapping operands didn't work. Try inverting the condition.
- bool NeedSwap = false;
- InvCC = getSetCCInverse(CCCode, OpVT);
- if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
- // If inverting the condition is not enough, try swapping operands
- // on top of it.
- InvCC = ISD::getSetCCSwappedOperands(InvCC);
- NeedSwap = true;
- }
- if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
- CC = DAG.getCondCode(InvCC);
- NeedInvert = true;
- if (NeedSwap)
- std::swap(LHS, RHS);
- return true;
- }
-
- ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
- unsigned Opc = 0;
- switch (CCCode) {
- default: llvm_unreachable("Don't know how to expand this condition!");
- case ISD::SETUO:
- if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
- CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;
- break;
- }
- assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
- "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
- NeedInvert = true;
- LLVM_FALLTHROUGH;
- case ISD::SETO:
- assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
- && "If SETO is expanded, SETOEQ must be legal!");
- CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
- case ISD::SETONE:
- case ISD::SETUEQ:
- // If the SETUO or SETO CC isn't legal, we might be able to use
- // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
- // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
- // the operands.
- CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
- if (!TLI.isCondCodeLegal(CC2, OpVT) &&
- (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
- TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
- CC1 = ISD::SETOGT;
- CC2 = ISD::SETOLT;
- Opc = ISD::OR;
- NeedInvert = ((unsigned)CCCode & 0x8U);
- break;
- }
- LLVM_FALLTHROUGH;
- case ISD::SETOEQ:
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETOLT:
- case ISD::SETOLE:
- case ISD::SETUNE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETULT:
- case ISD::SETULE:
- // If we are floating point, assign and break, otherwise fall through.
- if (!OpVT.isInteger()) {
- // We can use the 4th bit to tell if we are the unordered
- // or ordered version of the opcode.
- CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
- Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
- CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
- break;
- }
- // Fallthrough if we are unsigned integer.
- LLVM_FALLTHROUGH;
- case ISD::SETLE:
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETLT:
- case ISD::SETNE:
- case ISD::SETEQ:
- // If all combinations of inverting the condition and swapping operands
- // didn't work then we have no means to expand the condition.
- llvm_unreachable("Don't know how to expand this condition!");
- }
-
- SDValue SetCC1, SetCC2;
- if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
- // If we aren't the ordered or unorder operation,
- // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
- SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain,
- IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain,
- IsSignaling);
- } else {
- // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
- SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain,
- IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain,
- IsSignaling);
- }
- if (Chain)
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
- SetCC2.getValue(1));
- LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
- RHS = SDValue();
- CC = SDValue();
- return true;
- }
- }
- return false;
-}
-
/// Emit a store/load combination to the stack. This stores
/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
/// a load from the stack slot to DestVT, extending it if needed.
@@ -3729,8 +3579,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp2 = Node->getOperand(1 + Offset);
Tmp3 = Node->getOperand(2 + Offset);
bool Legalized =
- LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3,
- NeedInvert, dl, Chain, IsSignaling);
+ TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3,
+ NeedInvert, dl, Chain, IsSignaling);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
@@ -3825,8 +3675,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
if (!Legalized) {
- Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
- Tmp1, Tmp2, CC, NeedInvert, dl, Chain);
+ Legalized = TLI.LegalizeSetCCCondCode(
+ DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC,
+ NeedInvert, dl, Chain);
assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
@@ -3860,8 +3711,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp4 = Node->getOperand(1); // CC
bool Legalized =
- LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2,
- Tmp3, Tmp4, NeedInvert, dl, Chain);
+ TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()),
+ Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain);
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1cffe20cbb83..8dd8da352734 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -138,6 +138,7 @@ class VectorLegalizer {
SDValue ExpandStore(SDNode *N);
SDValue ExpandFNEG(SDNode *Node);
void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -396,7 +397,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SELECT:
case ISD::VSELECT:
case ISD::SELECT_CC:
- case ISD::SETCC:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::TRUNCATE:
@@ -495,6 +495,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(1).getValueType());
break;
+ case ISD::SETCC: {
+ MVT OpVT = Node->getOperand(0).getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal)
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ break;
+ }
}
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@@ -762,7 +770,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandFSUB(Node, Results);
return;
case ISD::SETCC:
- Results.push_back(UnrollVSETCC(Node));
+ ExpandSETCC(Node, Results);
return;
case ISD::ABS:
if (TLI.expandABS(Node, Tmp, DAG)) {
@@ -1331,6 +1339,50 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node,
Results.push_back(Tmp);
}
+void VectorLegalizer::ExpandSETCC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ bool NeedInvert = false;
+ SDLoc dl(Node);
+ MVT OpVT = Node->getOperand(0).getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+
+ if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
+ Results.push_back(UnrollVSETCC(Node));
+ return;
+ }
+
+ SDValue Chain;
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue CC = Node->getOperand(2);
+ bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS,
+ RHS, CC, NeedInvert, dl, Chain);
+
+ if (Legalized) {
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SETCC node.
+ if (CC.getNode())
+ LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
+ Node->getFlags());
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
+ } else {
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ EVT VT = Node->getValueType(0);
+ LHS =
+ DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
+ DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
+ DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
+ LHS->setFlags(Node->getFlags());
+ }
+
+ Results.push_back(LHS);
+}
+
void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
SDValue Result, Overflow;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b02a65e91ff3..04067ffd2303 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8698,3 +8698,137 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
MachinePointerInfo::getUnknownStack(MF));
}
+
+bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
+ SDValue &LHS, SDValue &RHS,
+ SDValue &CC, bool &NeedInvert,
+ const SDLoc &dl, SDValue &Chain,
+ bool IsSignaling) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ NeedInvert = false;
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default:
+ llvm_unreachable("Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(InvCC);
+ return true;
+ }
+ // Swapping operands didn't work. Try inverting the condition.
+ bool NeedSwap = false;
+ InvCC = getSetCCInverse(CCCode, OpVT);
+ if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ // If inverting the condition is not enough, try swapping operands
+ // on top of it.
+ InvCC = ISD::getSetCCSwappedOperands(InvCC);
+ NeedSwap = true;
+ }
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ if (NeedSwap)
+ std::swap(LHS, RHS);
+ return true;
+ }
+
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default:
+ llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETUO:
+ if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
+ CC1 = ISD::SETUNE;
+ CC2 = ISD::SETUNE;
+ Opc = ISD::OR;
+ break;
+ }
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+ "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
+ NeedInvert = true;
+ LLVM_FALLTHROUGH;
+ case ISD::SETO:
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+ "If SETO is expanded, SETOEQ must be legal!");
+ CC1 = ISD::SETOEQ;
+ CC2 = ISD::SETOEQ;
+ Opc = ISD::AND;
+ break;
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ // If the SETUO or SETO CC isn't legal, we might be able to use
+ // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
+ // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
+ // the operands.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ if (!TLI.isCondCodeLegal(CC2, OpVT) &&
+ (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
+ TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
+ CC1 = ISD::SETOGT;
+ CC2 = ISD::SETOLT;
+ Opc = ISD::OR;
+ NeedInvert = ((unsigned)CCCode & 0x8U);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETUNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If we are floating point, assign and break, otherwise fall through.
+ if (!OpVT.isInteger()) {
+ // We can use the 4th bit to tell if we are the unordered
+ // or ordered version of the opcode.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+ CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+ break;
+ }
+ // Fallthrough if we are unsigned integer.
+ LLVM_FALLTHROUGH;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETLT:
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ // If all combinations of inverting the condition and swapping operands
+ // didn't work then we have no means to expand the condition.
+ llvm_unreachable("Don't know how to expand this condition!");
+ }
+
+ SDValue SetCC1, SetCC2;
+ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+ // If we aren't the ordered or unorder operation,
+ // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+ } else {
+ // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+ }
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
+ SetCC2.getValue(1));
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6497d8e9f052..000cbf856c62 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1388,6 +1388,20 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ if (VT.isFloatingPoint()) {
+ setCondCodeAction(ISD::SETO, VT, Expand);
+ setCondCodeAction(ISD::SETOLT, VT, Expand);
+ setCondCodeAction(ISD::SETLT, VT, Expand);
+ setCondCodeAction(ISD::SETOLE, VT, Expand);
+ setCondCodeAction(ISD::SETLE, VT, Expand);
+ setCondCodeAction(ISD::SETULT, VT, Expand);
+ setCondCodeAction(ISD::SETULE, VT, Expand);
+ setCondCodeAction(ISD::SETUGE, VT, Expand);
+ setCondCodeAction(ISD::SETUGT, VT, Expand);
+ setCondCodeAction(ISD::SETUEQ, VT, Expand);
+ setCondCodeAction(ISD::SETUNE, VT, Expand);
+ }
+
// Lower fixed length vector operations to scalable equivalents.
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
@@ -10389,11 +10403,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isScalableVector()) {
- if (Op.getOperand(0).getValueType().isFloatingPoint())
- return Op;
+ if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
- }
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorSetccToSVE(Op, DAG);
@@ -17455,10 +17466,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
assert(Op.getValueType() == InVT.changeTypeToInteger() &&
"Expected integer result of the same bit length as the inputs!");
- // Expand floating point vector comparisons.
- if (InVT.isFloatingPoint())
- return SDValue();
-
auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 63a53cc0c8f1..d30bfe45335d 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -262,18 +262,6 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
-def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
- [(setoge node:$lhs, node:$rhs),
- (setge node:$lhs, node:$rhs)]>;
-def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
- [(setogt node:$lhs, node:$rhs),
- (setgt node:$lhs, node:$rhs)]>;
-def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
- [(setoeq node:$lhs, node:$rhs),
- (seteq node:$lhs, node:$rhs)]>;
-def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
- [(setone node:$lhs, node:$rhs),
- (setne node:$lhs, node:$rhs)]>;
def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
(AArch64mul_p node:$pred, node:$src1, node:$src2), [{
return N->hasOneUse();
@@ -1252,11 +1240,11 @@ let Predicates = [HasSVE] in {
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
- defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
- defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
- defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
- defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
- defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
+ defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, SETOGE, SETGE, SETOLE, SETLE>;
+ defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, SETOGT, SETGT, SETOLT, SETLT>;
+ defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, SETOEQ, SETEQ, SETOEQ, SETEQ>;
+ defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, SETONE, SETNE, SETONE, SETNE>;
+ defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, SETUO, SETUO, SETUO, SETUO>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 178c83b98599..ebeeea639c9c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4685,20 +4685,22 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
}
multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
- SDPatternOperator op_nopred>
+ CondCode cc1, CondCode cc2,
+ CondCode invcc1, CondCode invcc2>
: sve_fp_3op_p_pd<opc, asm, op> {
- def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8f16, nxv8f16,
- !cast<Instruction>(NAME # _H), PTRUE_H>;
- def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f16, nxv4f16,
- !cast<Instruction>(NAME # _H), PTRUE_S>;
- def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f16, nxv2f16,
- !cast<Instruction>(NAME # _H), PTRUE_D>;
- def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f32, nxv4f32,
- !cast<Instruction>(NAME # _S), PTRUE_S>;
- def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f32, nxv2f32,
- !cast<Instruction>(NAME # _S), PTRUE_D>;
- def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f64, nxv2f64,
- !cast<Instruction>(NAME # _D), PTRUE_D>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
new file mode 100644
index 000000000000..baf01768d55c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll
@@ -0,0 +1,762 @@
+; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
+; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
+; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Don't use SVE when its registers are no bigger than NEON.
+; NO_SVE-NOT: z{0-9}
+
+;
+; FCMP OEQ
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v4f16:
+; CHECK: fcmeq v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq <4 x half> %op1, %op2
+ %sext = sext <4 x i1> %cmp to <4 x i16>
+ ret <4 x i16> %sext
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v8f16:
+; CHECK: fcmeq v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq <8 x half> %op1, %op2
+ %sext = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %sext
+}
+
+define void @fcmp_oeq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp oeq <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+define void @fcmp_oeq_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; VBITS_GE_512-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1]
+; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h
+; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1
+; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h
+; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1
+; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x2]
+; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[C_HI]]]
+; VBITS_EQ_256-NEXT: ret
+ %op1 = load <32 x half>, <32 x half>* %a
+ %op2 = load <32 x half>, <32 x half>* %b
+ %cmp = fcmp oeq <32 x half> %op1, %op2
+ %sext = sext <32 x i1> %cmp to <32 x i16>
+ store <32 x i16> %sext, <32 x i16>* %c
+ ret void
+}
+
+define void @fcmp_oeq_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; VBITS_GE_1024-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; VBITS_GE_1024-NEXT: ret
+ %op1 = load <64 x half>, <64 x half>* %a
+ %op2 = load <64 x half>, <64 x half>* %b
+ %cmp = fcmp oeq <64 x half> %op1, %op2
+ %sext = sext <64 x i1> %cmp to <64 x i16>
+ store <64 x i16> %sext, <64 x i16>* %c
+ ret void
+}
+
+define void @fcmp_oeq_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v128f16:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
+; VBITS_GE_2048-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_2048-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; VBITS_GE_2048-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; VBITS_GE_2048-NEXT: ret
+ %op1 = load <128 x half>, <128 x half>* %a
+ %op2 = load <128 x half>, <128 x half>* %b
+ %cmp = fcmp oeq <128 x half> %op1, %op2
+ %sext = sext <128 x i1> %cmp to <128 x i16>
+ store <128 x i16> %sext, <128 x i16>* %c
+ ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v2f32:
+; CHECK: fcmeq v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq <2 x float> %op1, %op2
+ %sext = sext <2 x i1> %cmp to <2 x i32>
+ ret <2 x i32> %sext
+}
+
+; Don't use SVE for 128-bit vectors.
+define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v4f32:
+; CHECK: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq <4 x float> %op1, %op2
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %sext
+}
+
+define void @fcmp_oeq_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i32>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v8f32:
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
+; CHECK-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <8 x float>, <8 x float>* %a
+ %op2 = load <8 x float>, <8 x float>* %b
+ %cmp = fcmp oeq <8 x float> %op1, %op2
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ store <8 x i32> %sext, <8 x i32>* %c
+ ret void
+}
+
+define void @fcmp_oeq_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x i32>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v16f32:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
+; VBITS_GE_512-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_512-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
+; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
+; VBITS_GE_512-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
+; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1]
+; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s
+; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1
+; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s
+; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1
+; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x2]
+; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[C_HI]]]
+; VBITS_EQ_256-NEXT: ret
+ %op1 = load <16 x float>, <16 x float>* %a
+ %op2 = load <16 x float>, <16 x float>* %b
+ %cmp = fcmp oeq <16 x float> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i32>
+ store <16 x i32> %sext, <16 x i32>* %c
+ ret void
+}
+
+define void @fcmp_oeq_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x i32>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v32f32:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
+; VBITS_GE_1024-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_1024-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
+; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
+; VBITS_GE_1024-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
+; VBITS_GE_1024-NEXT: ret
+ %op1 = load <32 x float>, <32 x float>* %a
+ %op2 = load <32 x float>, <32 x float>* %b
+ %cmp = fcmp oeq <32 x float> %op1, %op2
+ %sext = sext <32 x i1> %cmp to <32 x i32>
+ store <32 x i32> %sext, <32 x i32>* %c
+ ret void
+}
+
+define void @fcmp_oeq_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x i32>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v64f32:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
+; VBITS_GE_2048-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; VBITS_GE_2048-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].s, [[PG]]/z, [[OP1]].s, [[OP2]].s
+; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].s, [[CMP]]/z, #-1
+; VBITS_GE_2048-NEXT: st1w { [[SEXT]].s }, [[PG]], [x2]
+; VBITS_GE_2048-NEXT: ret
+ %op1 = load <64 x float>, <64 x float>* %a
+ %op2 = load <64 x float>, <64 x float>* %b
+ %cmp = fcmp oeq <64 x float> %op1, %op2
+ %sext = sext <64 x i1> %cmp to <64 x i32>
+ store <64 x i32> %sext, <64 x i32>* %c
+ ret void
+}
+
+; Don't use SVE for 64-bit vectors.
+define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v1f64:
+; CHECK: fcmeq d0, d0, d1
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq <1 x double> %op1, %op2
+ %sext = sext <1 x i1> %cmp to <1 x i64>
+ ret <1 x i64> %sext
+}
+
+; Don't use SVE for 128-bit vectors.
+define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) #0 {
+; CHECK-LABEL: fcmp_oeq_v2f64:
+; CHECK: fcmeq v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %cmp = fcmp oeq <2 x double> %op1, %op2
+ %sext = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %sext
+}
+
+define void @fcmp_oeq_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i64>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v4f64:
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
+; CHECK-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <4 x double>, <4 x double>* %a
+ %op2 = load <4 x double>, <4 x double>* %b
+ %cmp = fcmp oeq <4 x double> %op1, %op2
+ %sext = sext <4 x i1> %cmp to <4 x i64>
+ store <4 x i64> %sext, <4 x i64>* %c
+ ret void
+}
+
+define void @fcmp_oeq_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x i64>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v8f64:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
+; VBITS_GE_512-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_512-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; VBITS_GE_512-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
+; VBITS_GE_512-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
+; VBITS_GE_512-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32
+; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32
+; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1]
+; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]]
+; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d
+; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1
+; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d
+; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1
+; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x2]
+; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[C_HI]]]
+; VBITS_EQ_256-NEXT: ret
+ %op1 = load <8 x double>, <8 x double>* %a
+ %op2 = load <8 x double>, <8 x double>* %b
+ %cmp = fcmp oeq <8 x double> %op1, %op2
+ %sext = sext <8 x i1> %cmp to <8 x i64>
+ store <8 x i64> %sext, <8 x i64>* %c
+ ret void
+}
+
+define void @fcmp_oeq_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x i64>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v16f64:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
+; VBITS_GE_1024-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_1024-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; VBITS_GE_1024-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
+; VBITS_GE_1024-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
+; VBITS_GE_1024-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
+; VBITS_GE_1024-NEXT: ret
+ %op1 = load <16 x double>, <16 x double>* %a
+ %op2 = load <16 x double>, <16 x double>* %b
+ %cmp = fcmp oeq <16 x double> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i64>
+ store <16 x i64> %sext, <16 x i64>* %c
+ ret void
+}
+
+define void @fcmp_oeq_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x i64>* %c) #0 {
+; CHECK-LABEL: fcmp_oeq_v32f64:
+; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
+; VBITS_GE_2048-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; VBITS_GE_2048-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; VBITS_GE_2048-NEXT: fcmeq [[CMP:p[0-9]+]].d, [[PG]]/z, [[OP1]].d, [[OP2]].d
+; VBITS_GE_2048-NEXT: mov [[SEXT:z[0-9]+]].d, [[CMP]]/z, #-1
+; VBITS_GE_2048-NEXT: st1d { [[SEXT]].d }, [[PG]], [x2]
+; VBITS_GE_2048-NEXT: ret
+ %op1 = load <32 x double>, <32 x double>* %a
+ %op2 = load <32 x double>, <32 x double>* %b
+ %cmp = fcmp oeq <32 x double> %op1, %op2
+ %sext = sext <32 x i1> %cmp to <32 x i64>
+ store <32 x i64> %sext, <32 x i64>* %c
+ ret void
+}
+
+;
+; FCMP UEQ
+;
+
+define void @fcmp_ueq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ueq_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp ueq <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP ONE
+;
+
+define void @fcmp_one_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_one_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp one <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP UNE
+;
+
+define void @fcmp_une_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_une_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp une <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP OGT
+;
+
+define void @fcmp_ogt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ogt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp ogt <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP UGT
+;
+
+define void @fcmp_ugt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ugt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp ugt <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP OLT
+;
+
+define void @fcmp_olt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_olt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp olt <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP ULT
+;
+
+define void @fcmp_ult_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ult_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp ult <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP OGE
+;
+
+define void @fcmp_oge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_oge_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp oge <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP UGE
+;
+
+define void @fcmp_uge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_uge_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp uge <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP OLE
+;
+
+define void @fcmp_ole_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ole_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp ole <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP ULE
+;
+
+define void @fcmp_ule_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ule_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp ule <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP UNO
+;
+
+define void @fcmp_uno_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_uno_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmuo [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp uno <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP ORD
+;
+
+define void @fcmp_ord_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ord_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: mov [[INV:w[0-9]+]], #65535
+; CHECK-NEXT: fcmuo [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: mov [[TMP:z[0-9]+]].h, [[INV]]
+; CHECK-NEXT: eor [[SEXT]].d, [[SEXT]].d, [[TMP]].d
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp ord <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP EQ
+;
+
+define void @fcmp_eq_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_eq_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmeq [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp fast oeq <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP NE
+;
+
+define void @fcmp_ne_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ne_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmne [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp fast one <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP GT
+;
+
+define void @fcmp_gt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_gt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp fast ogt <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP LT
+;
+
+define void @fcmp_lt_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_lt_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmgt [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp fast olt <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP GE
+;
+
+define void @fcmp_ge_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_ge_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp fast oge <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+;
+; FCMP LE
+;
+
+define void @fcmp_le_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i16>* %c) #0 {
+; CHECK-LABEL: fcmp_le_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: fcmge [[CMP:p[0-9]+]].h, [[PG]]/z, [[OP2]].h, [[OP1]].h
+; CHECK-NEXT: mov [[SEXT:z[0-9]+]].h, [[CMP]]/z, #-1
+; CHECK-NEXT: st1h { [[SEXT]].h }, [[PG]], [x2]
+; CHECK-NEXT: ret
+ %op1 = load <16 x half>, <16 x half>* %a
+ %op2 = load <16 x half>, <16 x half>* %b
+ %cmp = fcmp fast ole <16 x half> %op1, %op2
+ %sext = sext <16 x i1> %cmp to <16 x i16>
+ store <16 x i16> %sext, <16 x i16>* %c
+ ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 73cca71b539c..43bb0469cc91 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -626,32 +626,32 @@ define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvcmpgtsp vs0, v5, v4
; CHECK-NEXT: xvcmpgtsp vs1, v4, v5
-; CHECK-NEXT: xxlnor vs0, vs1, vs0
-; CHECK-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-NEXT: xxlor vs0, vs1, vs0
+; CHECK-NEXT: xxsel v2, v2, v3, vs0
; CHECK-NEXT: blr
;
; CHECK-REG-LABEL: test22:
; CHECK-REG: # %bb.0: # %entry
; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4
; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5
-; CHECK-REG-NEXT: xxlnor vs0, vs1, vs0
-; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-REG-NEXT: xxlor vs0, vs1, vs0
+; CHECK-REG-NEXT: xxsel v2, v2, v3, vs0
; CHECK-REG-NEXT: blr
;
; CHECK-FISL-LABEL: test22:
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4
; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5
-; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs1
-; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1
+; CHECK-FISL-NEXT: xxsel v2, v2, v3, vs0
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test22:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4
; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5
-; CHECK-LE-NEXT: xxlnor vs0, vs1, vs0
-; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0
+; CHECK-LE-NEXT: xxlor vs0, vs1, vs0
+; CHECK-LE-NEXT: xxsel v2, v2, v3, vs0
; CHECK-LE-NEXT: blr
entry:
%m = fcmp ueq <4 x float> %c, %d
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
index 8c880c236bb7..6c14ed5f738f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
@@ -285,9 +285,8 @@ define void @fcmp_ult_vv_v8f64(<8 x double>* %x, <8 x double>* %y, <8 x i1>* %z)
; CHECK-NEXT: vle64.v v28, (a0)
; CHECK-NEXT: vle64.v v8, (a1)
; CHECK-NEXT: vmfle.vv v25, v8, v28
-; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
-; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <8 x double>, <8 x double>* %x
@@ -358,8 +357,8 @@ define void @fcmp_ueq_vv_v32f32(<32 x float>* %x, <32 x float>* %y, <32 x i1>* %
; CHECK-NEXT: vsetvli a4, a3, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vle32.v v16, (a1)
-; CHECK-NEXT: vmflt.vv v25, v16, v8
-; CHECK-NEXT: vmflt.vv v26, v8, v16
+; CHECK-NEXT: vmflt.vv v25, v8, v16
+; CHECK-NEXT: vmflt.vv v26, v16, v8
; CHECK-NEXT: vsetvli a0, a3, e8,m2,ta,mu
; CHECK-NEXT: vmnor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a2)
@@ -395,8 +394,8 @@ define void @fcmp_one_vv_v8f64(<16 x double>* %x, <16 x double>* %y, <16 x i1>*
; CHECK-NEXT: vsetivli a3, 16, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vle64.v v16, (a1)
-; CHECK-NEXT: vmflt.vv v25, v16, v8
-; CHECK-NEXT: vmflt.vv v26, v8, v16
+; CHECK-NEXT: vmflt.vv v25, v8, v16
+; CHECK-NEXT: vmflt.vv v26, v16, v8
; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a2)
@@ -433,9 +432,8 @@ define void @fcmp_ord_vv_v4f16(<4 x half>* %x, <4 x half>* %y, <4 x i1>* %z) {
; CHECK-NEXT: vle16.v v26, (a0)
; CHECK-NEXT: vmfeq.vv v27, v25, v25
; CHECK-NEXT: vmfeq.vv v25, v26, v26
-; CHECK-NEXT: vsetivli a0, 4, e8,mf2,ta,mu
-; CHECK-NEXT: vmand.mm v25, v25, v27
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmand.mm v25, v25, v27
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <4 x half>, <4 x half>* %x
@@ -453,9 +451,8 @@ define void @fcmp_uno_vv_v4f16(<2 x half>* %x, <2 x half>* %y, <2 x i1>* %z) {
; CHECK-NEXT: vle16.v v26, (a0)
; CHECK-NEXT: vmfne.vv v27, v25, v25
; CHECK-NEXT: vmfne.vv v25, v26, v26
-; CHECK-NEXT: vsetivli a0, 2, e8,mf2,ta,mu
-; CHECK-NEXT: vmor.mm v25, v25, v27
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmor.mm v25, v25, v27
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %x
@@ -747,9 +744,8 @@ define void @fcmp_ult_vf_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 8, e64,m4,ta,mu
; CHECK-NEXT: vle64.v v28, (a0)
; CHECK-NEXT: vmfge.vf v25, v28, fa0
-; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
-; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <8 x double>, <8 x double>* %x
@@ -820,8 +816,8 @@ define void @fcmp_ueq_vf_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) {
; CHECK-NEXT: addi a2, zero, 32
; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmfgt.vf v25, v8, fa0
-; CHECK-NEXT: vmflt.vf v26, v8, fa0
+; CHECK-NEXT: vmflt.vf v25, v8, fa0
+; CHECK-NEXT: vmfgt.vf v26, v8, fa0
; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu
; CHECK-NEXT: vmnor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a1)
@@ -857,8 +853,8 @@ define void @fcmp_one_vf_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 16, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vmfgt.vf v25, v8, fa0
-; CHECK-NEXT: vmflt.vf v26, v8, fa0
+; CHECK-NEXT: vmflt.vf v25, v8, fa0
+; CHECK-NEXT: vmfgt.vf v26, v8, fa0
; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a1)
@@ -896,9 +892,8 @@ define void @fcmp_ord_vf_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vmfeq.vf v27, v26, fa0
; CHECK-NEXT: vmfeq.vv v26, v25, v25
-; CHECK-NEXT: vsetivli a0, 4, e8,mf2,ta,mu
-; CHECK-NEXT: vmand.mm v25, v26, v27
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmand.mm v25, v26, v27
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x half>, <4 x half>* %x
@@ -917,9 +912,8 @@ define void @fcmp_uno_vf_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
; CHECK-NEXT: vfmv.v.f v26, fa0
; CHECK-NEXT: vmfne.vf v27, v26, fa0
; CHECK-NEXT: vmfne.vv v26, v25, v25
-; CHECK-NEXT: vsetivli a0, 2, e8,mf2,ta,mu
-; CHECK-NEXT: vmor.mm v25, v26, v27
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmor.mm v25, v26, v27
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %x
@@ -1212,9 +1206,8 @@ define void @fcmp_ult_fv_v8f64(<8 x double>* %x, double %y, <8 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 8, e64,m4,ta,mu
; CHECK-NEXT: vle64.v v28, (a0)
; CHECK-NEXT: vmfle.vf v25, v28, fa0
-; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
-; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v25, v25
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <8 x double>, <8 x double>* %x
@@ -1285,8 +1278,8 @@ define void @fcmp_ueq_fv_v32f32(<32 x float>* %x, float %y, <32 x i1>* %z) {
; CHECK-NEXT: addi a2, zero, 32
; CHECK-NEXT: vsetvli a3, a2, e32,m8,ta,mu
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmflt.vf v25, v8, fa0
-; CHECK-NEXT: vmfgt.vf v26, v8, fa0
+; CHECK-NEXT: vmfgt.vf v25, v8, fa0
+; CHECK-NEXT: vmflt.vf v26, v8, fa0
; CHECK-NEXT: vsetvli a0, a2, e8,m2,ta,mu
; CHECK-NEXT: vmnor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a1)
@@ -1322,8 +1315,8 @@ define void @fcmp_one_fv_v8f64(<16 x double>* %x, double %y, <16 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 16, e64,m8,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vmflt.vf v25, v8, fa0
-; CHECK-NEXT: vmfgt.vf v26, v8, fa0
+; CHECK-NEXT: vmfgt.vf v25, v8, fa0
+; CHECK-NEXT: vmflt.vf v26, v8, fa0
; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
; CHECK-NEXT: vmor.mm v25, v26, v25
; CHECK-NEXT: vse1.v v25, (a1)
@@ -1359,11 +1352,10 @@ define void @fcmp_ord_fv_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 4, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfeq.vf v27, v26, fa0
-; CHECK-NEXT: vmfeq.vv v26, v25, v25
-; CHECK-NEXT: vsetivli a0, 4, e8,mf2,ta,mu
-; CHECK-NEXT: vmand.mm v25, v27, v26
+; CHECK-NEXT: vmfeq.vv v27, v25, v25
+; CHECK-NEXT: vmfeq.vf v25, v26, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmand.mm v25, v25, v27
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x half>, <4 x half>* %x
@@ -1380,11 +1372,10 @@ define void @fcmp_uno_fv_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
; CHECK-NEXT: vsetivli a2, 2, e16,m1,ta,mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vfmv.v.f v26, fa0
-; CHECK-NEXT: vmfne.vf v27, v26, fa0
-; CHECK-NEXT: vmfne.vv v26, v25, v25
-; CHECK-NEXT: vsetivli a0, 2, e8,mf2,ta,mu
-; CHECK-NEXT: vmor.mm v25, v27, v26
+; CHECK-NEXT: vmfne.vv v27, v25, v25
+; CHECK-NEXT: vmfne.vf v25, v26, fa0
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmor.mm v25, v25, v27
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %x
More information about the llvm-commits
mailing list