[llvm] 427b644 - Revert "[LLVM][ISel][AArch64 Remove AArch64ISD::FCM##z nodes. (#135817)"
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 24 02:54:04 PDT 2025
Author: Paul Walker
Date: 2025-04-24T09:48:54Z
New Revision: 427b6448a3af009e57c0142d6d8af83318b45093
URL: https://github.com/llvm/llvm-project/commit/427b6448a3af009e57c0142d6d8af83318b45093
DIFF: https://github.com/llvm/llvm-project/commit/427b6448a3af009e57c0142d6d8af83318b45093.diff
LOG: Revert "[LLVM][ISel][AArch64 Remove AArch64ISD::FCM##z nodes. (#135817)"
This reverts commit 15d8b3cae9debc2bd7d27ca92ff599ba9fb30da5.
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrGISel.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vector-fcmp.mir
llvm/test/CodeGen/AArch64/arm64-zip.ll
llvm/test/CodeGen/AArch64/select_cc.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index d8cc86b34a819..223d69c362185 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1385,8 +1385,7 @@ bool llvm::isBuildVectorConstantSplat(const Register Reg,
const MachineRegisterInfo &MRI,
int64_t SplatValue, bool AllowUndef) {
if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
- return SplatValAndReg->Value.getSExtValue() == SplatValue;
-
+ return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 713f814121aa3..ee4cc51f8d4ff 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2602,6 +2602,11 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
case AArch64ISD::FCMEQ:
case AArch64ISD::FCMGE:
case AArch64ISD::FCMGT:
+ case AArch64ISD::FCMEQz:
+ case AArch64ISD::FCMGEz:
+ case AArch64ISD::FCMGTz:
+ case AArch64ISD::FCMLEz:
+ case AArch64ISD::FCMLTz:
// Compares return either 0 or all-ones
return VTBits;
case AArch64ISD::VASHR: {
@@ -2818,6 +2823,11 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FCMEQ)
MAKE_CASE(AArch64ISD::FCMGE)
MAKE_CASE(AArch64ISD::FCMGT)
+ MAKE_CASE(AArch64ISD::FCMEQz)
+ MAKE_CASE(AArch64ISD::FCMGEz)
+ MAKE_CASE(AArch64ISD::FCMGTz)
+ MAKE_CASE(AArch64ISD::FCMLEz)
+ MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::UADDLV)
@@ -15830,19 +15840,40 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
"function only supposed to emit natural comparisons");
+ APInt SplatValue;
+ APInt SplatUndef;
+ unsigned SplatBitSize = 0;
+ bool HasAnyUndefs;
+
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
+ bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+
+ bool IsZero = IsCnst && SplatValue == 0;
+
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
default:
return SDValue();
case AArch64CC::NE: {
- SDValue Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
+ SDValue Fcmeq;
+ if (IsZero)
+ Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
+ else
+ Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
return DAG.getNOT(dl, Fcmeq, VT);
}
case AArch64CC::EQ:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
case AArch64CC::GE:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
case AArch64CC::GT:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
case AArch64CC::LE:
if (!NoNans)
@@ -15850,6 +15881,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
// If we ignore NaNs then we can use to the LS implementation.
[[fallthrough]];
case AArch64CC::LS:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
case AArch64CC::LT:
if (!NoNans)
@@ -15857,6 +15890,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
// If we ignore NaNs then we can use to the MI implementation.
[[fallthrough]];
case AArch64CC::MI:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d9b535b910b80..8b5d2ec9e6ddf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -245,6 +245,13 @@ enum NodeType : unsigned {
FCMGE,
FCMGT,
+ // Vector zero comparisons
+ FCMEQz,
+ FCMGEz,
+ FCMGTz,
+ FCMLEz,
+ FCMLTz,
+
// Round wide FP to narrow FP with inexact results to odd.
FCVTXN,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 2a0da9a1373ee..9bbcb6f3aedf5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7136,7 +7136,7 @@ multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
// FP Comparisons support only S and D element sizes (and H for v8.2a).
multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
- string asm, SDPatternOperator OpNode> {
+ string asm, SDNode OpNode> {
let mayRaiseFPException = 1, Uses = [FPCR] in {
let Predicates = [HasNEON, HasFullFP16] in {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7322212c5bb24..a99019d72b4ac 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -179,6 +179,36 @@ def G_FCMGT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_FCMEQZ : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+def G_FCMGEZ : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+def G_FCMGTZ : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+def G_FCMLEZ : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+def G_FCMLTZ : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
def G_AARCH64_PREFETCH : AArch64GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins type0:$imm, ptype0:$src1);
@@ -265,6 +295,12 @@ def : GINodeEquiv<G_FCMEQ, AArch64fcmeq>;
def : GINodeEquiv<G_FCMGE, AArch64fcmge>;
def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
+def : GINodeEquiv<G_FCMEQZ, AArch64fcmeqz>;
+def : GINodeEquiv<G_FCMGEZ, AArch64fcmgez>;
+def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
+def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
+def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
+
def : GINodeEquiv<G_BSP, AArch64bsp>;
def : GINodeEquiv<G_UMULL, AArch64umull>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7f7e1d20ae604..4657a77e80ecc 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -882,20 +882,11 @@ def AArch64cmltz : PatFrag<(ops node:$lhs),
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
(vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
-def AArch64fcmeqz : PatFrag<(ops node:$lhs),
- (AArch64fcmeq node:$lhs, immAllZerosV)>;
-
-def AArch64fcmgez : PatFrag<(ops node:$lhs),
- (AArch64fcmge node:$lhs, immAllZerosV)>;
-
-def AArch64fcmgtz : PatFrag<(ops node:$lhs),
- (AArch64fcmgt node:$lhs, immAllZerosV)>;
-
-def AArch64fcmlez : PatFrag<(ops node:$lhs),
- (AArch64fcmge immAllZerosV, node:$lhs)>;
-
-def AArch64fcmltz : PatFrag<(ops node:$lhs),
- (AArch64fcmgt immAllZerosV, node:$lhs)>;
+def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
+def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
+def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
+def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
+def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 81ee525ed0501..4785c7b68d94d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -808,14 +808,16 @@ void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
-
+ auto Splat = getAArch64VectorSplat(MI, MRI);
+ if (!Splat)
+ return false;
+ if (Splat->isReg())
+ return true;
// Later, during selection, we'll try to match imported patterns using
// immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
// G_BUILD_VECTORs which could match those patterns.
- if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))
- return false;
-
- return getAArch64VectorSplat(MI, MRI).has_value();
+ int64_t Cst = Splat->getCst();
+ return (Cst != 0 && Cst != -1);
}
void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -931,10 +933,11 @@ void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
/// \returns a function which builds a vector floating point compare instruction
/// for a condition code \p CC.
+/// \param [in] IsZero - True if the comparison is against 0.
/// \param [in] NoNans - True if the target has NoNansFPMath.
std::function<Register(MachineIRBuilder &)>
-getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
- MachineRegisterInfo &MRI) {
+getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
+ bool NoNans, MachineRegisterInfo &MRI) {
LLT DstTy = MRI.getType(LHS);
assert(DstTy.isVector() && "Expected vector types only?");
assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
@@ -942,29 +945,46 @@ getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
default:
llvm_unreachable("Unexpected condition code!");
case AArch64CC::NE:
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
- auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+ auto FCmp = IsZero
+ ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
+ : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
return MIB.buildNot(DstTy, FCmp).getReg(0);
};
case AArch64CC::EQ:
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
- return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+ return IsZero
+ ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
+ : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
+ .getReg(0);
};
case AArch64CC::GE:
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
- return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+ return IsZero
+ ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
+ : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
+ .getReg(0);
};
case AArch64CC::GT:
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
- return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+ return IsZero
+ ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
+ : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
+ .getReg(0);
};
case AArch64CC::LS:
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
- return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+ return IsZero
+ ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
+ : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
+ .getReg(0);
};
case AArch64CC::MI:
- return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
- return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
+ return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
+ return IsZero
+ ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
+ : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
+ .getReg(0);
};
}
}
@@ -1004,17 +1024,23 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
LLT DstTy = MRI.getType(Dst);
+ auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
+
+ // Compares against 0 have special target-specific pseudos.
+ bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
+
bool Invert = false;
AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
if ((Pred == CmpInst::Predicate::FCMP_ORD ||
Pred == CmpInst::Predicate::FCMP_UNO) &&
- isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
+ IsZero) {
// The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
// NaN, so equivalent to a == a and doesn't need the two comparisons an
// "ord" normally would.
// Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
// thus equivalent to a != a.
RHS = LHS;
+ IsZero = false;
CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
} else
changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
@@ -1025,12 +1051,12 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
const bool NoNans =
ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
- auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
+ auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
Register CmpRes;
if (CC2 == AArch64CC::AL)
CmpRes = Cmp(MIB);
else {
- auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
+ auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
auto Cmp2Dst = Cmp2(MIB);
auto Cmp1Dst = Cmp(MIB);
CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
index 591dafc1ad098..1f5fb892df582 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
@@ -37,10 +37,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMEQ:%[0-9]+]]:_(<2 x s64>) = G_FCMEQ %lhs, %zero_vec(<2 x s64>)
- ; CHECK-NEXT: $q0 = COPY [[FCMEQ]](<2 x s64>)
+ ; CHECK-NEXT: [[FCMEQZ:%[0-9]+]]:_(<2 x s64>) = G_FCMEQZ %lhs
+ ; CHECK-NEXT: $q0 = COPY [[FCMEQZ]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -84,10 +82,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMGT:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %lhs, %zero_vec(<2 x s64>)
- ; CHECK-NEXT: $q0 = COPY [[FCMGT]](<2 x s64>)
+ ; CHECK-NEXT: [[FCMGTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMGTZ %lhs
+ ; CHECK-NEXT: $q0 = COPY [[FCMGTZ]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -133,10 +129,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMGE:%[0-9]+]]:_(<2 x s64>) = G_FCMGE %lhs, %zero_vec(<2 x s64>)
- ; CHECK-NEXT: $q0 = COPY [[FCMGE]](<2 x s64>)
+ ; CHECK-NEXT: [[FCMGEZ:%[0-9]+]]:_(<2 x s64>) = G_FCMGEZ %lhs
+ ; CHECK-NEXT: $q0 = COPY [[FCMGEZ]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -180,10 +174,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMGT:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %zero_vec, %lhs(<2 x s64>)
- ; CHECK-NEXT: $q0 = COPY [[FCMGT]](<2 x s64>)
+ ; CHECK-NEXT: [[FCMLTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMLTZ %lhs
+ ; CHECK-NEXT: $q0 = COPY [[FCMLTZ]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -226,10 +218,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMGE:%[0-9]+]]:_(<2 x s64>) = G_FCMGE %zero_vec, %lhs(<2 x s64>)
- ; CHECK-NEXT: $q0 = COPY [[FCMGE]](<2 x s64>)
+ ; CHECK-NEXT: [[FCMLEZ:%[0-9]+]]:_(<2 x s64>) = G_FCMLEZ %lhs
+ ; CHECK-NEXT: $q0 = COPY [[FCMLEZ]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
%zero:_(s64) = G_CONSTANT i64 0
@@ -280,11 +270,9 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMGT:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %lhs, %zero_vec(<2 x s64>)
- ; CHECK-NEXT: [[FCMGT1:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %zero_vec, %lhs(<2 x s64>)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[FCMGT1]], [[FCMGT]]
+ ; CHECK-NEXT: [[FCMGTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMGTZ %lhs
+ ; CHECK-NEXT: [[FCMLTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMLTZ %lhs
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[FCMLTZ]], [[FCMGTZ]]
; CHECK-NEXT: $q0 = COPY [[OR]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
@@ -446,12 +434,10 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMGE:%[0-9]+]]:_(<2 x s64>) = G_FCMGE %lhs, %zero_vec(<2 x s64>)
+ ; CHECK-NEXT: [[FCMGEZ:%[0-9]+]]:_(<2 x s64>) = G_FCMGEZ %lhs
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMGE]], [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMGEZ]], [[BUILD_VECTOR]]
; CHECK-NEXT: $q0 = COPY [[XOR]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
@@ -504,12 +490,10 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMGT:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %lhs, %zero_vec(<2 x s64>)
+ ; CHECK-NEXT: [[FCMGTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMGTZ %lhs
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMGT]], [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMGTZ]], [[BUILD_VECTOR]]
; CHECK-NEXT: $q0 = COPY [[XOR]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
@@ -562,12 +546,10 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMGE:%[0-9]+]]:_(<2 x s64>) = G_FCMGE %zero_vec, %lhs(<2 x s64>)
+ ; CHECK-NEXT: [[FCMLEZ:%[0-9]+]]:_(<2 x s64>) = G_FCMLEZ %lhs
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMGE]], [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMLEZ]], [[BUILD_VECTOR]]
; CHECK-NEXT: $q0 = COPY [[XOR]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
@@ -620,12 +602,10 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMGT:%[0-9]+]]:_(<2 x s64>) = G_FCMGT %zero_vec, %lhs(<2 x s64>)
+ ; CHECK-NEXT: [[FCMLTZ:%[0-9]+]]:_(<2 x s64>) = G_FCMLTZ %lhs
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMGT]], [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMLTZ]], [[BUILD_VECTOR]]
; CHECK-NEXT: $q0 = COPY [[XOR]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
@@ -678,12 +658,10 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<2 x s64>) = COPY $q0
- ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- ; CHECK-NEXT: [[FCMEQ:%[0-9]+]]:_(<2 x s64>) = G_FCMEQ %lhs, %zero_vec(<2 x s64>)
+ ; CHECK-NEXT: [[FCMEQZ:%[0-9]+]]:_(<2 x s64>) = G_FCMEQZ %lhs
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMEQ]], [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[FCMEQZ]], [[BUILD_VECTOR]]
; CHECK-NEXT: $q0 = COPY [[XOR]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<2 x s64>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vector-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vector-fcmp.mir
index daf84b5cf07e9..0b0c3ed763abc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vector-fcmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vector-fcmp.mir
@@ -77,7 +77,7 @@ body: |
%lhs:fpr(<2 x s64>) = COPY $q0
%zero:gpr(s64) = G_CONSTANT i64 0
%zero_vec:fpr(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- %fcmp:fpr(<2 x s64>) = G_FCMEQ %lhs, %zero_vec(<2 x s64>)
+ %fcmp:fpr(<2 x s64>) = G_FCMEQZ %lhs
$q0 = COPY %fcmp(<2 x s64>)
RET_ReallyLR implicit $q0
@@ -97,7 +97,7 @@ body: |
%lhs:fpr(<2 x s64>) = COPY $q0
%zero:gpr(s64) = G_CONSTANT i64 0
%zero_vec:fpr(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- %fcmp:fpr(<2 x s64>) = G_FCMGE %lhs, %zero_vec(<2 x s64>)
+ %fcmp:fpr(<2 x s64>) = G_FCMGEZ %lhs
$q0 = COPY %fcmp(<2 x s64>)
RET_ReallyLR implicit $q0
@@ -117,7 +117,7 @@ body: |
%lhs:fpr(<2 x s64>) = COPY $q0
%zero:gpr(s64) = G_CONSTANT i64 0
%zero_vec:fpr(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- %fcmp:fpr(<2 x s64>) = G_FCMGT %lhs, %zero_vec(<2 x s64>)
+ %fcmp:fpr(<2 x s64>) = G_FCMGTZ %lhs
$q0 = COPY %fcmp(<2 x s64>)
RET_ReallyLR implicit $q0
@@ -137,7 +137,7 @@ body: |
%lhs:fpr(<2 x s64>) = COPY $q0
%zero:gpr(s64) = G_CONSTANT i64 0
%zero_vec:fpr(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- %fcmp:fpr(<2 x s64>) = G_FCMGE %zero_vec(<2 x s64>), %lhs
+ %fcmp:fpr(<2 x s64>) = G_FCMLEZ %lhs
$q0 = COPY %fcmp(<2 x s64>)
RET_ReallyLR implicit $q0
@@ -157,6 +157,6 @@ body: |
%lhs:fpr(<2 x s64>) = COPY $q0
%zero:gpr(s64) = G_CONSTANT i64 0
%zero_vec:fpr(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
- %fcmp:fpr(<2 x s64>) = G_FCMGT %zero_vec(<2 x s64>), %lhs
+ %fcmp:fpr(<2 x s64>) = G_FCMLTZ %lhs
$q0 = COPY %fcmp(<2 x s64>)
RET_ReallyLR implicit $q0
diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index b24e54a68fb42..9955b253f563e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -405,7 +405,8 @@ define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
define <4 x float> @shuffle_zip1(<4 x float> %arg) {
; CHECK-LABEL: shuffle_zip1:
; CHECK: // %bb.0: // %bb
-; CHECK-NEXT: fcmgt.4s v0, v0, #0.0
+; CHECK-NEXT: movi.2d v1, #0000000000000000
+; CHECK-NEXT: fcmgt.4s v0, v0, v1
; CHECK-NEXT: uzp1.8h v1, v0, v0
; CHECK-NEXT: xtn.4h v0, v0
; CHECK-NEXT: xtn.4h v1, v1
diff --git a/llvm/test/CodeGen/AArch64/select_cc.ll b/llvm/test/CodeGen/AArch64/select_cc.ll
index 66dbd4ed5a4dc..6feaabe85d9ba 100644
--- a/llvm/test/CodeGen/AArch64/select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/select_cc.ll
@@ -85,8 +85,9 @@ entry:
define <2 x double> @select_olt_load_cmp(<2 x double> %a, ptr %src) {
; CHECK-SD-LABEL: select_olt_load_cmp:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ldr d1, [x0]
-; CHECK-SD-NEXT: fcmgt v1.2s, v1.2s, #0.0
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ldr d2, [x0]
+; CHECK-SD-NEXT: fcmgt v1.2s, v2.2s, v1.2s
; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
More information about the llvm-commits
mailing list