[llvm] a1ce88b - [AArch64][SVE] Implement AArch64ISD::SETCC_PRED
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Fri May 15 11:53:46 PDT 2020
Author: Eli Friedman
Date: 2020-05-15T11:53:21-07:00
New Revision: a1ce88b4e32ed64935a0ce8df25819a51f06b7dd
URL: https://github.com/llvm/llvm-project/commit/a1ce88b4e32ed64935a0ce8df25819a51f06b7dd
DIFF: https://github.com/llvm/llvm-project/commit/a1ce88b4e32ed64935a0ce8df25819a51f06b7dd.diff
LOG: [AArch64][SVE] Implement AArch64ISD::SETCC_PRED
This unifies SETCC operations along the lines of other operations.
Differential Revision: https://reviews.llvm.org/D79975
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fbd9ba27a9d2..8abb5eb5d720 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -886,6 +886,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
+ if (VT.getScalarType() == MVT::i1)
+ setOperationAction(ISD::SETCC, VT, Custom);
}
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
@@ -1294,6 +1296,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::SHL_PRED: return "AArch64ISD::SHL_PRED";
case AArch64ISD::SRL_PRED: return "AArch64ISD::SRL_PRED";
case AArch64ISD::SRA_PRED: return "AArch64ISD::SRA_PRED";
+ case AArch64ISD::SETCC_PRED: return "AArch64ISD::SETCC_PRED";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
@@ -7719,7 +7722,9 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
VT.getVectorNumElements(), true);
SDValue Mask = getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
- return DAG.getNode(NewOp, DL, VT, Mask, Op.getOperand(0), Op.getOperand(1));
+ SmallVector<SDValue, 4> Operands = {Mask};
+ Operands.append(Op->op_begin(), Op->op_end());
+ return DAG.getNode(NewOp, DL, VT, Operands);
}
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
@@ -8788,6 +8793,12 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
SelectionDAG &DAG) const {
+ if (Op.getValueType().isScalableVector()) {
+ if (Op.getOperand(0).getValueType().isFloatingPoint())
+ return Op;
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_PRED);
+ }
+
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -11291,8 +11302,7 @@ static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
}
-static SDValue tryConvertSVEWideCompare(SDNode *N, unsigned ReplacementIID,
- bool Invert,
+static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalize())
@@ -11346,17 +11356,8 @@ static SDValue tryConvertSVEWideCompare(SDNode *N, unsigned ReplacementIID,
}
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
- SDValue ID = DAG.getTargetConstant(ReplacementIID, DL, MVT::i64);
- SDValue Op0, Op1;
- if (Invert) {
- Op0 = Splat;
- Op1 = N->getOperand(2);
- } else {
- Op0 = N->getOperand(2);
- Op1 = Splat;
- }
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- ID, Pred, Op0, Op1);
+ return DAG.getNode(AArch64ISD::SETCC_PRED, DL, VT, Pred, N->getOperand(2),
+ Splat, DAG.getCondCode(CC));
}
return SDValue();
@@ -11530,6 +11531,42 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_sve_asr:
return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_cmphs:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(ISD::SETUGE));
+ break;
+ case Intrinsic::aarch64_sve_cmphi:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(ISD::SETUGT));
+ break;
+ case Intrinsic::aarch64_sve_cmpge:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(ISD::SETGE));
+ break;
+ case Intrinsic::aarch64_sve_cmpgt:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(ISD::SETGT));
+ break;
+ case Intrinsic::aarch64_sve_cmpeq:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(ISD::SETEQ));
+ break;
+ case Intrinsic::aarch64_sve_cmpne:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(ISD::SETNE));
+ break;
case Intrinsic::aarch64_sve_fadda:
return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
case Intrinsic::aarch64_sve_faddv:
@@ -11546,35 +11583,25 @@ static SDValue performIntrinsicCombine(SDNode *N,
return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmpeq_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpeq,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
case Intrinsic::aarch64_sve_cmpne_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpne,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpge_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpge,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpgt_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpgt,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplt_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpgt,
- true, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
case Intrinsic::aarch64_sve_cmple_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpge,
- true, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphs_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphi_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphi,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplo_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphi, true,
- DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
case Intrinsic::aarch64_sve_cmpls_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs, true,
- DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 1e0c875da327..2ae86b665761 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -62,6 +62,7 @@ enum NodeType : unsigned {
SHL_PRED,
SRL_PRED,
SRA_PRED,
+ SETCC_PRED,
// Arithmetic instructions which write flags.
ADDS,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 8c0c95bd124a..81de64757e87 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1005,12 +1005,12 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;
defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;
- defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", int_aarch64_sve_cmphs, SETUGE>;
- defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", int_aarch64_sve_cmphi, SETUGT>;
- defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", int_aarch64_sve_cmpge, SETGE>;
- defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt", int_aarch64_sve_cmpgt, SETGT>;
- defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq", int_aarch64_sve_cmpeq, SETEQ>;
- defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne", int_aarch64_sve_cmpne, SETNE>;
+ defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
+ defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
+ defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
+ defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt", SETGT, SETLT>;
+ defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq", SETEQ, SETEQ>;
+ defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne", SETNE, SETNE>;
defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq", int_aarch64_sve_cmpeq_wide>;
defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne", int_aarch64_sve_cmpne_wide>;
@@ -1023,16 +1023,16 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo", int_aarch64_sve_cmplo_wide>;
defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls", int_aarch64_sve_cmpls_wide>;
- defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge", SETGE, int_aarch64_sve_cmpge>;
- defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt", SETGT, int_aarch64_sve_cmpgt>;
- defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt", SETLT, null_frag, int_aarch64_sve_cmpgt>;
- defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple", SETLE, null_frag, int_aarch64_sve_cmpge>;
- defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq", SETEQ, int_aarch64_sve_cmpeq>;
- defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne", SETNE, int_aarch64_sve_cmpne>;
- defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs", SETUGE, int_aarch64_sve_cmphs>;
- defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi", SETUGT, int_aarch64_sve_cmphi>;
- defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, null_frag, int_aarch64_sve_cmphi>;
- defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, null_frag, int_aarch64_sve_cmphs>;
+ defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge", SETGE, SETLE>;
+ defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt", SETGT, SETLT>;
+ defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt", SETLT, SETGT>;
+ defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple", SETLE, SETGE>;
+ defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq", SETEQ, SETEQ>;
+ defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne", SETNE, SETEQ>;
+ defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs", SETUGE, SETULE>;
+ defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi", SETUGT, SETULT>;
+ defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
+ defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 48b3d8a3199a..e874edbb5fe2 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10,6 +10,14 @@
//
//===----------------------------------------------------------------------===//
+def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
+ SDTCVecEltisVT<0, i1>, SDTCVecEltisVT<1, i1>, SDTCisSameAs<2, 3>,
+ SDTCisVT<4, OtherVT>
+]>;
+
+def AArch64setcc_pred : SDNode<"AArch64ISD::SETCC_PRED", SDT_AArch64Setcc>;
+
def SVEPatternOperand : AsmOperandClass {
let Name = "SVEPattern";
let ParserMethod = "tryParseSVEPattern";
@@ -4147,17 +4155,24 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
let Defs = [NZCV];
}
-multiclass sve_int_cmp_0<bits<3> opc, string asm, SDPatternOperator op,
- CondCode cc> {
+multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
+ ValueType intvt, sve_int_cmp cmp> {
+ def : Pat<(predvt (AArch64setcc_pred predvt:$Op1, intvt:$Op2, intvt:$Op3, cc)),
+ (cmp $Op1, $Op2, $Op3)>;
+ def : Pat<(predvt (AArch64setcc_pred predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)),
+ (cmp $Op1, $Op3, $Op2)>;
+}
+
+multiclass sve_int_cmp_0<bits<3> opc, string asm, CondCode cc, CondCode invcc> {
def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>;
def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>;
def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR32>;
def _D : sve_int_cmp<0b0, 0b11, opc, asm, PPR64, ZPR64, ZPR64>;
- def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_SETCC_Pat<cc, invcc, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ defm : SVE_SETCC_Pat<cc, invcc, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc, invcc, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc, invcc, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_cmp_0_wide<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4212,67 +4227,35 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let ElementSize = pprty.ElementSize;
}
-multiclass sve_int_scmp_vi<bits<3> opc, string asm, CondCode cc,
- SDPatternOperator op = null_frag,
- SDPatternOperator inv_op = null_frag> {
+multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
+ ValueType predvt, ValueType intvt,
+ Operand immtype, Instruction cmp> {
+ def : Pat<(predvt (AArch64setcc_pred (predvt PPR_3b:$Pg),
+ (intvt ZPR:$Zs1),
+ (intvt (AArch64dup (immtype:$imm))),
+ cc)),
+ (cmp $Pg, $Zs1, immtype:$imm)>;
+ def : Pat<(predvt (AArch64setcc_pred (predvt PPR_3b:$Pg),
+ (intvt (AArch64dup (immtype:$imm))),
+ (intvt ZPR:$Zs1),
+ commuted_cc)),
+ (cmp $Pg, $Zs1, immtype:$imm)>;
+}
+
+multiclass sve_int_scmp_vi<bits<3> opc, string asm, CondCode cc, CondCode commuted_cc> {
def _B : sve_int_scmp_vi<0b00, opc, asm, PPR8, ZPR8, simm5_32b>;
def _H : sve_int_scmp_vi<0b01, opc, asm, PPR16, ZPR16, simm5_32b>;
def _S : sve_int_scmp_vi<0b10, opc, asm, PPR32, ZPR32, simm5_32b>;
def _D : sve_int_scmp_vi<0b11, opc, asm, PPR64, ZPR64, simm5_64b>;
- // IR version
- def : Pat<(nxv16i1 (setcc (nxv16i8 ZPR:$Zs1),
- (nxv16i8 (AArch64dup (simm5_32b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_B") (PTRUE_B 31), ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv8i1 (setcc (nxv8i16 ZPR:$Zs1),
- (nxv8i16 (AArch64dup (simm5_32b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_H") (PTRUE_H 31), ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv4i1 (setcc (nxv4i32 ZPR:$Zs1),
- (nxv4i32 (AArch64dup (simm5_32b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_S") (PTRUE_S 31), ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv2i1 (setcc (nxv2i64 ZPR:$Zs1),
- (nxv2i64 (AArch64dup (simm5_64b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_D") (PTRUE_D 31), ZPR:$Zs1, simm5_64b:$imm)>;
-
- // Intrinsic version
- def : Pat<(nxv16i1 (op (nxv16i1 PPR_3b:$Pg),
- (nxv16i8 ZPR:$Zs1),
- (nxv16i8 (AArch64dup (simm5_32b:$imm))))),
- (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv8i1 (op (nxv8i1 PPR_3b:$Pg),
- (nxv8i16 ZPR:$Zs1),
- (nxv8i16 (AArch64dup (simm5_32b:$imm))))),
- (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv4i1 (op (nxv4i1 PPR_3b:$Pg),
- (nxv4i32 ZPR:$Zs1),
- (nxv4i32 (AArch64dup (simm5_32b:$imm))))),
- (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv2i1 (op (nxv2i1 PPR_3b:$Pg),
- (nxv2i64 ZPR:$Zs1),
- (nxv2i64 (AArch64dup (simm5_64b:$imm))))),
- (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, simm5_64b:$imm)>;
-
- // Inverted intrinsic version
- def : Pat<(nxv16i1 (inv_op (nxv16i1 PPR_3b:$Pg),
- (nxv16i8 (AArch64dup (simm5_32b:$imm))),
- (nxv16i8 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv8i1 (inv_op (nxv8i1 PPR_3b:$Pg),
- (nxv8i16 (AArch64dup (simm5_32b:$imm))),
- (nxv8i16 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv4i1 (inv_op (nxv4i1 PPR_3b:$Pg),
- (nxv4i32 (AArch64dup (simm5_32b:$imm))),
- (nxv4i32 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv2i1 (inv_op (nxv2i1 PPR_3b:$Pg),
- (nxv2i64 (AArch64dup (simm5_64b:$imm))),
- (nxv2i64 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, simm5_64b:$imm)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv16i1, nxv16i8, simm5_32b,
+ !cast<Instruction>(NAME # _B)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv8i1, nxv8i16, simm5_32b,
+ !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv4i1, nxv4i32, simm5_32b,
+ !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv2i1, nxv2i64, simm5_64b,
+ !cast<Instruction>(NAME # _D)>;
}
@@ -4304,66 +4287,20 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
}
multiclass sve_int_ucmp_vi<bits<2> opc, string asm, CondCode cc,
- SDPatternOperator op = null_frag,
- SDPatternOperator inv_op = null_frag> {
+ CondCode commuted_cc> {
def _B : sve_int_ucmp_vi<0b00, opc, asm, PPR8, ZPR8, imm0_127>;
def _H : sve_int_ucmp_vi<0b01, opc, asm, PPR16, ZPR16, imm0_127>;
def _S : sve_int_ucmp_vi<0b10, opc, asm, PPR32, ZPR32, imm0_127>;
def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127_64b>;
- // IR version
- def : Pat<(nxv16i1 (setcc (nxv16i8 ZPR:$Zs1),
- (nxv16i8 (AArch64dup (imm0_127:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_B") (PTRUE_B 31), ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv8i1 (setcc (nxv8i16 ZPR:$Zs1),
- (nxv8i16 (AArch64dup (imm0_127:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_H") (PTRUE_H 31), ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv4i1 (setcc (nxv4i32 ZPR:$Zs1),
- (nxv4i32 (AArch64dup (imm0_127:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_S") (PTRUE_S 31), ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv2i1 (setcc (nxv2i64 ZPR:$Zs1),
- (nxv2i64 (AArch64dup (imm0_127_64b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_D") (PTRUE_D 31), ZPR:$Zs1, imm0_127_64b:$imm)>;
-
- // Intrinsic version
- def : Pat<(nxv16i1 (op (nxv16i1 PPR_3b:$Pg),
- (nxv16i8 ZPR:$Zs1),
- (nxv16i8 (AArch64dup (imm0_127:$imm))))),
- (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv8i1 (op (nxv8i1 PPR_3b:$Pg),
- (nxv8i16 ZPR:$Zs1),
- (nxv8i16 (AArch64dup (imm0_127:$imm))))),
- (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv4i1 (op (nxv4i1 PPR_3b:$Pg),
- (nxv4i32 ZPR:$Zs1),
- (nxv4i32 (AArch64dup (imm0_127:$imm))))),
- (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv2i1 (op (nxv2i1 PPR_3b:$Pg),
- (nxv2i64 ZPR:$Zs1),
- (nxv2i64 (AArch64dup (imm0_127_64b:$imm))))),
- (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, imm0_127_64b:$imm)>;
-
- // Inverted intrinsic version
- def : Pat<(nxv16i1 (inv_op (nxv16i1 PPR_3b:$Pg),
- (nxv16i8 (AArch64dup (imm0_127:$imm))),
- (nxv16i8 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv8i1 (inv_op (nxv8i1 PPR_3b:$Pg),
- (nxv8i16 (AArch64dup (imm0_127:$imm))),
- (nxv8i16 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv4i1 (inv_op (nxv4i1 PPR_3b:$Pg),
- (nxv4i32 (AArch64dup (imm0_127:$imm))),
- (nxv4i32 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv2i1 (inv_op (nxv2i1 PPR_3b:$Pg),
- (nxv2i64 (AArch64dup (imm0_127_64b:$imm))),
- (nxv2i64 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, imm0_127_64b:$imm)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv16i1, nxv16i8, imm0_127,
+ !cast<Instruction>(NAME # _B)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv8i1, nxv8i16, imm0_127,
+ !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv4i1, nxv4i32, imm0_127,
+ !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv2i1, nxv2i64, imm0_127_64b,
+ !cast<Instruction>(NAME # _D)>;
}
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
index 2a4984f6e245..824ce5a1ef60 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
@@ -74,6 +74,42 @@ define <vscale x 4 x i1> @cmpeq_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
ret <vscale x 4 x i1> %out
}
+define <vscale x 16 x i1> @cmpeq_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpeq_ir_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = icmp eq <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmpeq_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmpeq_ir_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = icmp eq <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmpeq_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmpeq_ir_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = icmp eq <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmpeq_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpeq_ir_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = icmp eq <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
;
; CMPGE
;
@@ -148,6 +184,78 @@ define <vscale x 4 x i1> @cmpge_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
ret <vscale x 4 x i1> %out
}
+define <vscale x 16 x i1> @cmpge_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpge_ir_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = icmp sge <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmpge_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmpge_ir_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = icmp sge <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmpge_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmpge_ir_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = icmp sge <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmpge_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpge_ir_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmpge p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = icmp sge <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
+define <vscale x 16 x i1> @cmpge_ir_comm_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpge_ir_comm_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmpge p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT: ret
+ %out = icmp sle <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmpge_ir_comm_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmpge_ir_comm_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmpge p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: ret
+ %out = icmp sle <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmpge_ir_comm_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmpge_ir_comm_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmpge p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: ret
+ %out = icmp sle <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmpge_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpge_ir_comm_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmpge p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: ret
+ %out = icmp sle <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
;
; CMPGT
;
@@ -222,6 +330,78 @@ define <vscale x 4 x i1> @cmpgt_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
ret <vscale x 4 x i1> %out
}
+define <vscale x 16 x i1> @cmpgt_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpgt_ir_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = icmp sgt <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmpgt_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmpgt_ir_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = icmp sgt <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmpgt_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmpgt_ir_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = icmp sgt <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmpgt_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpgt_ir_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmpgt p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = icmp sgt <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
+define <vscale x 16 x i1> @cmpgt_ir_comm_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpgt_ir_comm_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmpgt p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT: ret
+ %out = icmp slt <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmpgt_ir_comm_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmpgt_ir_comm_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmpgt p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: ret
+ %out = icmp slt <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmpgt_ir_comm_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmpgt_ir_comm_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: ret
+ %out = icmp slt <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmpgt_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpgt_ir_comm_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmpgt p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: ret
+ %out = icmp slt <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
;
; CMPHI
;
@@ -296,6 +476,78 @@ define <vscale x 4 x i1> @cmphi_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
ret <vscale x 4 x i1> %out
}
+define <vscale x 16 x i1> @cmphi_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmphi_ir_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = icmp ugt <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmphi_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmphi_ir_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = icmp ugt <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmphi_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmphi_ir_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = icmp ugt <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmphi_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphi_ir_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = icmp ugt <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
+define <vscale x 16 x i1> @cmphi_ir_comm_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmphi_ir_comm_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT: ret
+ %out = icmp ult <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmphi_ir_comm_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmphi_ir_comm_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: ret
+ %out = icmp ult <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmphi_ir_comm_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmphi_ir_comm_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: ret
+ %out = icmp ult <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmphi_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphi_ir_comm_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: ret
+ %out = icmp ult <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
;
; CMPHS
;
@@ -370,6 +622,78 @@ define <vscale x 4 x i1> @cmphs_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
ret <vscale x 4 x i1> %out
}
+define <vscale x 16 x i1> @cmphs_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmphs_ir_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = icmp uge <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmphs_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmphs_ir_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmphs p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = icmp uge <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmphs_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmphs_ir_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = icmp uge <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmphs_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphs_ir_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmphs p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = icmp uge <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
+define <vscale x 16 x i1> @cmphs_ir_comm_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmphs_ir_comm_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmphs p0.b, p0/z, z1.b, z0.b
+; CHECK-NEXT: ret
+ %out = icmp ule <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmphs_ir_comm_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmphs_ir_comm_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmphs p0.h, p0/z, z1.h, z0.h
+; CHECK-NEXT: ret
+ %out = icmp ule <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmphs_ir_comm_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmphs_ir_comm_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmphs p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: ret
+ %out = icmp ule <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmphs_ir_comm_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphs_ir_comm_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmphs p0.d, p0/z, z1.d, z0.d
+; CHECK-NEXT: ret
+ %out = icmp ule <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
;
; CMPLE
;
@@ -580,6 +904,42 @@ define <vscale x 4 x i1> @cmpne_wide_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
ret <vscale x 4 x i1> %out
}
+define <vscale x 16 x i1> @cmpne_ir_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: cmpne_ir_b:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = icmp ne <vscale x 16 x i8> %a, %b
+ ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @cmpne_ir_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: cmpne_ir_h:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = icmp ne <vscale x 8 x i16> %a, %b
+ ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @cmpne_ir_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: cmpne_ir_s:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = icmp ne <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @cmpne_ir_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpne_ir_d:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = icmp ne <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i1> %out
+}
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpeq.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
More information about the llvm-commits
mailing list