[llvm] [LLVM][AArch64] Refactor lowering of fixed length integer setcc operations. (PR #132434)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 21 10:37:17 PDT 2025
https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/132434
The original code is essentially performing isel during legalisation with the AArch64 specific nodes offering no additional value compared to ISD::SETCC.
Whilst not the motivating case, the effect of removing the indirection means global-isel no longer misses out on the custom handling.
If agreeable I hope to follow this with matching refactoring of the floating point based setcc operations.
>From 54a1631104f50df88ac42143aa3191cd98d1af3b Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Fri, 21 Mar 2025 13:10:37 +0000
Subject: [PATCH] [LLVM][AArch64] Refactor lowering of fixed length integer
setcc operations.
The original code is essentially performing isel during legalisation
with the AArch64 specific nodes offering no additional value compared
to ISD::SETCC.
Whilst not the motivating case, the effect of removing the indirection
means global-isel no longer misses out on the custom handling.
If agreeable I hope to follow this with matching refactoring of the
floating point based setcc operations.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 130 +++-----
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 10 -
.../lib/Target/AArch64/AArch64InstrFormats.td | 6 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 36 ++-
llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll | 15 +-
.../test/CodeGen/AArch64/fptosi-sat-vector.ll | 6 +-
.../AArch64/neon-bitwise-instructions.ll | 18 +-
.../AArch64/neon-compare-instructions.ll | 301 +++++-------------
.../sve-fixed-length-extract-subvector.ll | 5 +-
.../AArch64/sve-fixed-length-masked-gather.ll | 4 +-
.../sve-fixed-length-masked-scatter.ll | 3 +-
.../AArch64/vec-combine-compare-to-bitmask.ll | 39 +--
12 files changed, 186 insertions(+), 387 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0db6c614684d7..c8922c4a1d5a5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2057,6 +2057,15 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::READ_REGISTER, MVT::i128, Custom);
setOperationAction(ISD::WRITE_REGISTER, MVT::i128, Custom);
}
+
+ if (VT.isInteger()) {
+ // Let common code emit inverted variants of compares we do support.
+ setCondCodeAction(ISD::SETNE, VT, Expand);
+ setCondCodeAction(ISD::SETLE, VT, Expand);
+ setCondCodeAction(ISD::SETLT, VT, Expand);
+ setCondCodeAction(ISD::SETULE, VT, Expand);
+ setCondCodeAction(ISD::SETULT, VT, Expand);
+ }
}
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
@@ -2581,31 +2590,21 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
unsigned VTBits = VT.getScalarSizeInBits();
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
- case AArch64ISD::CMEQ:
- case AArch64ISD::CMGE:
- case AArch64ISD::CMGT:
- case AArch64ISD::CMHI:
- case AArch64ISD::CMHS:
- case AArch64ISD::FCMEQ:
- case AArch64ISD::FCMGE:
- case AArch64ISD::FCMGT:
- case AArch64ISD::CMEQz:
- case AArch64ISD::CMGEz:
- case AArch64ISD::CMGTz:
- case AArch64ISD::CMLEz:
- case AArch64ISD::CMLTz:
- case AArch64ISD::FCMEQz:
- case AArch64ISD::FCMGEz:
- case AArch64ISD::FCMGTz:
- case AArch64ISD::FCMLEz:
- case AArch64ISD::FCMLTz:
- // Compares return either 0 or all-ones
- return VTBits;
- case AArch64ISD::VASHR: {
- unsigned Tmp =
- DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
- return std::min<uint64_t>(Tmp + Op.getConstantOperandVal(1), VTBits);
- }
+ case AArch64ISD::FCMEQ:
+ case AArch64ISD::FCMGE:
+ case AArch64ISD::FCMGT:
+ case AArch64ISD::FCMEQz:
+ case AArch64ISD::FCMGEz:
+ case AArch64ISD::FCMGTz:
+ case AArch64ISD::FCMLEz:
+ case AArch64ISD::FCMLTz:
+ // Compares return either 0 or all-ones
+ return VTBits;
+ case AArch64ISD::VASHR: {
+ unsigned Tmp =
+ DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ return std::min<uint64_t>(Tmp + Op.getConstantOperandVal(1), VTBits);
+ }
}
return 1;
@@ -2812,19 +2811,9 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::VASHR)
MAKE_CASE(AArch64ISD::VSLI)
MAKE_CASE(AArch64ISD::VSRI)
- MAKE_CASE(AArch64ISD::CMEQ)
- MAKE_CASE(AArch64ISD::CMGE)
- MAKE_CASE(AArch64ISD::CMGT)
- MAKE_CASE(AArch64ISD::CMHI)
- MAKE_CASE(AArch64ISD::CMHS)
MAKE_CASE(AArch64ISD::FCMEQ)
MAKE_CASE(AArch64ISD::FCMGE)
MAKE_CASE(AArch64ISD::FCMGT)
- MAKE_CASE(AArch64ISD::CMEQz)
- MAKE_CASE(AArch64ISD::CMGEz)
- MAKE_CASE(AArch64ISD::CMGTz)
- MAKE_CASE(AArch64ISD::CMLEz)
- MAKE_CASE(AArch64ISD::CMLTz)
MAKE_CASE(AArch64ISD::FCMEQz)
MAKE_CASE(AArch64ISD::FCMGEz)
MAKE_CASE(AArch64ISD::FCMGTz)
@@ -15814,9 +15803,6 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
SplatBitSize, HasAnyUndefs);
bool IsZero = IsCnst && SplatValue == 0;
- bool IsOne =
- IsCnst && SrcVT.getScalarSizeInBits() == SplatBitSize && SplatValue == 1;
- bool IsMinusOne = IsCnst && SplatValue.isAllOnes();
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
@@ -15863,50 +15849,7 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
}
}
- switch (CC) {
- default:
- return SDValue();
- case AArch64CC::NE: {
- SDValue Cmeq;
- if (IsZero)
- Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
- else
- Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
- return DAG.getNOT(dl, Cmeq, VT);
- }
- case AArch64CC::EQ:
- if (IsZero)
- return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
- return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
- case AArch64CC::GE:
- if (IsZero)
- return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
- return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
- case AArch64CC::GT:
- if (IsZero)
- return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
- if (IsMinusOne)
- return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
- return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
- case AArch64CC::LE:
- if (IsZero)
- return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
- return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
- case AArch64CC::LS:
- return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
- case AArch64CC::LO:
- return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
- case AArch64CC::LT:
- if (IsZero)
- return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
- if (IsOne)
- return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
- return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
- case AArch64CC::HI:
- return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
- case AArch64CC::HS:
- return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
- }
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
@@ -15927,9 +15870,11 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
if (LHS.getValueType().getVectorElementType().isInteger()) {
assert(LHS.getValueType() == RHS.getValueType());
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
- SDValue Cmp =
- EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
- return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
+ if (SDValue Cmp =
+ EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG))
+ return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
+
+ return Op;
}
// Lower isnan(x) | isnan(never-nan) to x != x.
@@ -18128,7 +18073,9 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
return SDValue();
- return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
+ SDLoc DL(N);
+ SDValue Zero = DAG.getConstant(0, DL, Shift.getValueType());
+ return DAG.getSetCC(DL, VT, Shift.getOperand(0), Zero, ISD::SETGE);
}
// Given a vecreduce_add node, detect the below pattern and convert it to the
@@ -18739,7 +18686,8 @@ static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue In = DAG.getNode(AArch64ISD::NVCAST, DL, HalfVT, Srl.getOperand(0));
- SDValue CM = DAG.getNode(AArch64ISD::CMLTz, DL, HalfVT, In);
+ SDValue Zero = DAG.getConstant(0, DL, In.getValueType());
+ SDValue CM = DAG.getSetCC(DL, HalfVT, Zero, In, ISD::SETGT);
return DAG.getNode(AArch64ISD::NVCAST, DL, VT, CM);
}
@@ -25268,6 +25216,14 @@ static SDValue performSETCCCombine(SDNode *N,
if (SDValue V = performOrXorChainCombine(N, DAG))
return V;
+ EVT CmpVT = LHS.getValueType();
+
+ APInt SplatLHSVal;
+ if (CmpVT.isInteger() && Cond == ISD::SETGT &&
+ ISD::isConstantSplatVector(LHS.getNode(), SplatLHSVal) &&
+ SplatLHSVal.isOne())
+ return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, CmpVT), RHS, ISD::SETGE);
+
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index bc0c3a832bb28..ba275e18fa126 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -241,21 +241,11 @@ enum NodeType : unsigned {
VSRI,
// Vector comparisons
- CMEQ,
- CMGE,
- CMGT,
- CMHI,
- CMHS,
FCMEQ,
FCMGE,
FCMGT,
// Vector zero comparisons
- CMEQz,
- CMGEz,
- CMGTz,
- CMLEz,
- CMLTz,
FCMEQz,
FCMGEz,
FCMGTz,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 255cd0ec5840c..6d8b84ea4239c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7086,7 +7086,7 @@ multiclass SIMD_FP8_CVTL<bits<2>sz, string asm, ValueType dty, SDPatternOperator
class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
bits<5> opcode, RegisterOperand regtype, string asm,
string kind, string zero, ValueType dty,
- ValueType sty, SDNode OpNode>
+ ValueType sty, SDPatternOperator OpNode>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
"|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
@@ -7110,7 +7110,7 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
// Comparisons support all element sizes, except 1xD.
multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
- SDNode OpNode> {
+ SDPatternOperator OpNode> {
def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64,
asm, ".8b", "0",
v8i8, v8i8, OpNode>;
@@ -7981,7 +7981,7 @@ multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, 0b00, opc, FPR64, asm, "0">;
- def : Pat<(v1i64 (OpNode FPR64:$Rn)),
+ def : Pat<(v1i64 (OpNode v1i64:$Rn)),
(!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6c61e3a613f6f..a676e07f23dc3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -846,23 +846,35 @@ def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
-def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
-def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
-def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
-def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
-def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
+def AArch64cmeq : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETEQ)>;
+def AArch64cmge : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETGE)>;
+def AArch64cmgt : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETGT)>;
+def AArch64cmhi : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUGT)>;
+def AArch64cmhs : PatFrag<(ops node:$lhs, node:$rhs),
+ (setcc node:$lhs, node:$rhs, SETUGE)>;
def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
-def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
-def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
-def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
-def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
-def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
+def AArch64cmeqz : PatFrag<(ops node:$lhs),
+ (setcc node:$lhs, immAllZerosV, SETEQ)>;
+def AArch64cmgez : PatFrags<(ops node:$lhs),
+ [(setcc node:$lhs, immAllZerosV, SETGE),
+ (setcc node:$lhs, immAllOnesV, SETGT)]>;
+def AArch64cmgtz : PatFrag<(ops node:$lhs),
+ (setcc node:$lhs, immAllZerosV, SETGT)>;
+def AArch64cmlez : PatFrag<(ops node:$lhs),
+ (setcc immAllZerosV, node:$lhs, SETGE)>;
+def AArch64cmltz : PatFrag<(ops node:$lhs),
+ (setcc immAllZerosV, node:$lhs, SETGT)>;
+
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
- (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
+ (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
@@ -5671,7 +5683,7 @@ defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
-def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
+def : Pat<(VT (vnot (AArch64cmeqz VT:$Rn))), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
}
defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
let Predicates = [HasNEON] in {
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index f0c9dccb21d84..c7a423f2e4f8d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -352,17 +352,16 @@ define void @typei1_orig(i64 %a, ptr %p, ptr %q) {
;
; CHECK-GI-LABEL: typei1_orig:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x2]
+; CHECK-GI-NEXT: ldr q0, [x2]
; CHECK-GI-NEXT: cmp x0, #0
-; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: cset w8, gt
-; CHECK-GI-NEXT: neg v1.8h, v1.8h
-; CHECK-GI-NEXT: dup v2.8h, w8
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: mul v1.8h, v1.8h, v2.8h
-; CHECK-GI-NEXT: cmeq v1.8h, v1.8h, #0
+; CHECK-GI-NEXT: neg v0.8h, v0.8h
+; CHECK-GI-NEXT: dup v1.8h, w8
+; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmtst v0.8h, v0.8h, v0.8h
; CHECK-GI-NEXT: mvn v1.16b, v1.16b
-; CHECK-GI-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7
; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
; CHECK-GI-NEXT: str q0, [x1]
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 81770a4ebdd4d..c834ca772b6ac 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -2382,11 +2382,11 @@ define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
; CHECK-GI-LABEL: test_signed_v2f64_v2i1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-GI-NEXT: movi v2.2d, #0xffffffffffffffff
; CHECK-GI-NEXT: cmlt v1.2d, v0.2d, #0
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: cmgt v1.2d, v0.2d, v2.2d
-; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
+; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: cmge v2.2d, v0.2d, #0
+; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: ret
%x = call <2 x i1> @llvm.fptosi.sat.v2f64.v2i1(<2 x double> %f)
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index f6dbf5251fc27..fb65a748c865f 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1499,8 +1499,7 @@ define <8 x i8> @vselect_cmpz_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
;
; CHECK-GI-LABEL: vselect_cmpz_ne:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cmeq v0.8b, v0.8b, #0
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
+; CHECK-GI-NEXT: cmtst v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: bsl v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
%cmp = icmp ne <8 x i8> %a, zeroinitializer
@@ -1533,17 +1532,10 @@ define <8 x i8> @vselect_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
}
define <8 x i8> @sext_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
-; CHECK-SD-LABEL: sext_tst:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sext_tst:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: cmeq v0.8b, v0.8b, #0
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sext_tst:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tmp3 = and <8 x i8> %a, %b
%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
%d = sext <8 x i1> %tmp4 to <8 x i8>
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 8f7d5dd5588b9..2c2cb72112879 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -738,17 +738,10 @@ define <2 x i64> @cmls2xi64(<2 x i64> %A, <2 x i64> %B) {
}
define <8 x i8> @cmtst8xi8(<8 x i8> %A, <8 x i8> %B) {
-; CHECK-SD-LABEL: cmtst8xi8:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmtst8xi8:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: cmeq v0.8b, v0.8b, #0
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmtst8xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
%tmp3 = and <8 x i8> %A, %B
%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
%tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
@@ -756,17 +749,10 @@ define <8 x i8> @cmtst8xi8(<8 x i8> %A, <8 x i8> %B) {
}
define <16 x i8> @cmtst16xi8(<16 x i8> %A, <16 x i8> %B) {
-; CHECK-SD-LABEL: cmtst16xi8:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmtst16xi8:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: cmeq v0.16b, v0.16b, #0
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmtst16xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
%tmp3 = and <16 x i8> %A, %B
%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
%tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
@@ -774,17 +760,10 @@ define <16 x i8> @cmtst16xi8(<16 x i8> %A, <16 x i8> %B) {
}
define <4 x i16> @cmtst4xi16(<4 x i16> %A, <4 x i16> %B) {
-; CHECK-SD-LABEL: cmtst4xi16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.4h, v0.4h, v1.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmtst4xi16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: cmeq v0.4h, v0.4h, #0
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmtst4xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
%tmp3 = and <4 x i16> %A, %B
%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
%tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
@@ -792,17 +771,10 @@ define <4 x i16> @cmtst4xi16(<4 x i16> %A, <4 x i16> %B) {
}
define <8 x i16> @cmtst8xi16(<8 x i16> %A, <8 x i16> %B) {
-; CHECK-SD-LABEL: cmtst8xi16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.8h, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmtst8xi16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: cmeq v0.8h, v0.8h, #0
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmtst8xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp3 = and <8 x i16> %A, %B
%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
%tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
@@ -810,17 +782,10 @@ define <8 x i16> @cmtst8xi16(<8 x i16> %A, <8 x i16> %B) {
}
define <2 x i32> @cmtst2xi32(<2 x i32> %A, <2 x i32> %B) {
-; CHECK-SD-LABEL: cmtst2xi32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmtst2xi32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: cmeq v0.2s, v0.2s, #0
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmtst2xi32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
%tmp3 = and <2 x i32> %A, %B
%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
%tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
@@ -828,17 +793,10 @@ define <2 x i32> @cmtst2xi32(<2 x i32> %A, <2 x i32> %B) {
}
define <4 x i32> @cmtst4xi32(<4 x i32> %A, <4 x i32> %B) {
-; CHECK-SD-LABEL: cmtst4xi32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmtst4xi32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, #0
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmtst4xi32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp3 = and <4 x i32> %A, %B
%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
%tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
@@ -846,17 +804,10 @@ define <4 x i32> @cmtst4xi32(<4 x i32> %A, <4 x i32> %B) {
}
define <2 x i64> @cmtst2xi64(<2 x i64> %A, <2 x i64> %B) {
-; CHECK-SD-LABEL: cmtst2xi64:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.2d, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmtst2xi64:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: cmeq v0.2d, v0.2d, #0
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmtst2xi64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
%tmp3 = and <2 x i64> %A, %B
%tmp4 = icmp ne <2 x i64> %tmp3, zeroinitializer
%tmp5 = sext <2 x i1> %tmp4 to <2 x i64>
@@ -1120,112 +1071,70 @@ define <2 x i64> @cmgez2xi64_alt(<2 x i64> %A) {
}
define <8 x i8> @cmgez8xi8_alt2(<8 x i8> %A) {
-; CHECK-SD-LABEL: cmgez8xi8_alt2:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmge v0.8b, v0.8b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmgez8xi8_alt2:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi d1, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmgez8xi8_alt2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmge v0.8b, v0.8b, #0
+; CHECK-NEXT: ret
%tmp3 = icmp sgt <8 x i8> %A, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
}
define <16 x i8> @cmgez16xi8_alt2(<16 x i8> %A) {
-; CHECK-SD-LABEL: cmgez16xi8_alt2:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmge v0.16b, v0.16b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmgez16xi8_alt2:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmgez16xi8_alt2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmge v0.16b, v0.16b, #0
+; CHECK-NEXT: ret
%tmp3 = icmp sgt <16 x i8> %A, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4
}
define <4 x i16> @cmgez4xi16_alt2(<4 x i16> %A) {
-; CHECK-SD-LABEL: cmgez4xi16_alt2:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmge v0.4h, v0.4h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmgez4xi16_alt2:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi d1, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmgez4xi16_alt2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmge v0.4h, v0.4h, #0
+; CHECK-NEXT: ret
%tmp3 = icmp sgt <4 x i16> %A, <i16 -1, i16 -1, i16 -1, i16 -1>
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
}
define <8 x i16> @cmgez8xi16_alt2(<8 x i16> %A) {
-; CHECK-SD-LABEL: cmgez8xi16_alt2:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmge v0.8h, v0.8h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmgez8xi16_alt2:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmgez8xi16_alt2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmge v0.8h, v0.8h, #0
+; CHECK-NEXT: ret
%tmp3 = icmp sgt <8 x i16> %A, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
}
define <2 x i32> @cmgez2xi32_alt2(<2 x i32> %A) {
-; CHECK-SD-LABEL: cmgez2xi32_alt2:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmge v0.2s, v0.2s, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmgez2xi32_alt2:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi d1, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmgez2xi32_alt2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmge v0.2s, v0.2s, #0
+; CHECK-NEXT: ret
%tmp3 = icmp sgt <2 x i32> %A, <i32 -1, i32 -1>
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
}
define <4 x i32> @cmgez4xi32_alt2(<4 x i32> %A) {
-; CHECK-SD-LABEL: cmgez4xi32_alt2:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmge v0.4s, v0.4s, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmgez4xi32_alt2:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmgez4xi32_alt2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmge v0.4s, v0.4s, #0
+; CHECK-NEXT: ret
%tmp3 = icmp sgt <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 -1>
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
define <2 x i64> @cmgez2xi64_alt2(<2 x i64> %A) {
-; CHECK-SD-LABEL: cmgez2xi64_alt2:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmge v0.2d, v0.2d, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmgez2xi64_alt2:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT: cmgt v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmgez2xi64_alt2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmge v0.2d, v0.2d, #0
+; CHECK-NEXT: ret
%tmp3 = icmp sgt <2 x i64> %A, <i64 -1, i64 -1>
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -1692,112 +1601,70 @@ define <2 x i64> @cmltz2xi64_alt(<2 x i64> %A) {
}
define <8 x i8> @cmneqz8xi8(<8 x i8> %A) {
-; CHECK-SD-LABEL: cmneqz8xi8:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.8b, v0.8b, v0.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmneqz8xi8:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cmeq v0.8b, v0.8b, #0
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmneqz8xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b
+; CHECK-NEXT: ret
%tmp3 = icmp ne <8 x i8> %A, zeroinitializer
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
}
define <16 x i8> @cmneqz16xi8(<16 x i8> %A) {
-; CHECK-SD-LABEL: cmneqz16xi8:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.16b, v0.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmneqz16xi8:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cmeq v0.16b, v0.16b, #0
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmneqz16xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b
+; CHECK-NEXT: ret
%tmp3 = icmp ne <16 x i8> %A, zeroinitializer
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4
}
define <4 x i16> @cmneqz4xi16(<4 x i16> %A) {
-; CHECK-SD-LABEL: cmneqz4xi16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.4h, v0.4h, v0.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmneqz4xi16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cmeq v0.4h, v0.4h, #0
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmneqz4xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.4h, v0.4h, v0.4h
+; CHECK-NEXT: ret
%tmp3 = icmp ne <4 x i16> %A, zeroinitializer
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
}
define <8 x i16> @cmneqz8xi16(<8 x i16> %A) {
-; CHECK-SD-LABEL: cmneqz8xi16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.8h, v0.8h, v0.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmneqz8xi16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cmeq v0.8h, v0.8h, #0
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmneqz8xi16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.8h, v0.8h, v0.8h
+; CHECK-NEXT: ret
%tmp3 = icmp ne <8 x i16> %A, zeroinitializer
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
}
define <2 x i32> @cmneqz2xi32(<2 x i32> %A) {
-; CHECK-SD-LABEL: cmneqz2xi32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.2s, v0.2s, v0.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmneqz2xi32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cmeq v0.2s, v0.2s, #0
-; CHECK-GI-NEXT: mvn v0.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmneqz2xi32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s
+; CHECK-NEXT: ret
%tmp3 = icmp ne <2 x i32> %A, zeroinitializer
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
}
define <4 x i32> @cmneqz4xi32(<4 x i32> %A) {
-; CHECK-SD-LABEL: cmneqz4xi32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.4s, v0.4s, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmneqz4xi32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, #0
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmneqz4xi32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: ret
%tmp3 = icmp ne <4 x i32> %A, zeroinitializer
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
-; CHECK-SD-LABEL: cmneqz2xi64:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmtst v0.2d, v0.2d, v0.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmneqz2xi64:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: cmeq v0.2d, v0.2d, #0
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmneqz2xi64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmtst v0.2d, v0.2d, v0.2d
+; CHECK-NEXT: ret
%tmp3 = icmp ne <2 x i64> %A, zeroinitializer
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
index 8fac0e1067684..8e519806718f7 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
@@ -549,13 +549,12 @@ define void @extract_subvector_legalization_v8i32() vscale_range(2,2) #0 {
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: adrp x8, .LCPI40_0
; CHECK-NEXT: add x8, x8, :lo12:.LCPI40_0
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: cmeq v1.4s, v1.4s, v2.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: cmeq v1.4s, v1.4s, #0
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: sunpklo z1.d, z1.s
; CHECK-NEXT: cmpne p0.d, p1/z, z1.d, #0
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
index 5516a4716d59d..a50d0dc37eaf6 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
@@ -309,8 +309,8 @@ define void @masked_gather_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: cmeq v0.2s, v0.2s, #0
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: cmeq v0.2d, v0.2d, #0
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: ldr q0, [x1]
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
index e3e06dcdf17f3..a42fce70f4f15 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
@@ -295,9 +295,8 @@ define void @masked_scatter_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: cmeq v1.2s, v0.2s, #0
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-NEXT: cmeq v1.2d, v0.2d, #0
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
index b0a30b7150637..4e2ca082e28b5 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
@@ -30,8 +30,7 @@ define i16 @convert_to_bitmask16(<16 x i8> %vec) {
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: sub sp, sp, #16
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: cmeq.16b v0, v0, #0
-; CHECK-GI-NEXT: mvn.16b v0, v0
+; CHECK-GI-NEXT: cmtst.16b v0, v0, v0
; CHECK-GI-NEXT: umov.b w8, v0[1]
; CHECK-GI-NEXT: umov.b w9, v0[0]
; CHECK-GI-NEXT: umov.b w10, v0[2]
@@ -106,8 +105,7 @@ define i16 @convert_to_bitmask8(<8 x i16> %vec) {
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: sub sp, sp, #16
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: cmeq.8h v0, v0, #0
-; CHECK-GI-NEXT: mvn.16b v0, v0
+; CHECK-GI-NEXT: cmtst.8h v0, v0, v0
; CHECK-GI-NEXT: xtn.8b v0, v0
; CHECK-GI-NEXT: umov.b w8, v0[1]
; CHECK-GI-NEXT: umov.b w9, v0[0]
@@ -158,8 +156,7 @@ define i4 @convert_to_bitmask4(<4 x i32> %vec) {
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: sub sp, sp, #16
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: cmeq.4s v0, v0, #0
-; CHECK-GI-NEXT: mvn.16b v0, v0
+; CHECK-GI-NEXT: cmtst.4s v0, v0, v0
; CHECK-GI-NEXT: mov.s w8, v0[1]
; CHECK-GI-NEXT: mov.s w9, v0[2]
; CHECK-GI-NEXT: fmov w11, s0
@@ -709,10 +706,8 @@ define i8 @convert_large_vector(<8 x i32> %vec) {
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: sub sp, sp, #16
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: cmeq.4s v0, v0, #0
-; CHECK-GI-NEXT: cmeq.4s v1, v1, #0
-; CHECK-GI-NEXT: mvn.16b v0, v0
-; CHECK-GI-NEXT: mvn.16b v1, v1
+; CHECK-GI-NEXT: cmtst.4s v0, v0, v0
+; CHECK-GI-NEXT: cmtst.4s v1, v1, v1
; CHECK-GI-NEXT: uzp1.8h v0, v0, v1
; CHECK-GI-NEXT: xtn.8b v0, v0
; CHECK-GI-NEXT: umov.b w8, v0[1]
@@ -766,9 +761,7 @@ define i4 @convert_legalized_illegal_element_size(<4 x i22> %vec) {
; CHECK-GI-NEXT: sub sp, sp, #16
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
; CHECK-GI-NEXT: movi.4s v1, #63, msl #16
-; CHECK-GI-NEXT: and.16b v0, v0, v1
-; CHECK-GI-NEXT: cmeq.4s v0, v0, #0
-; CHECK-GI-NEXT: mvn.16b v0, v0
+; CHECK-GI-NEXT: cmtst.4s v0, v0, v1
; CHECK-GI-NEXT: mov.s w8, v0[1]
; CHECK-GI-NEXT: mov.s w9, v0[2]
; CHECK-GI-NEXT: fmov w11, s0
@@ -819,18 +812,11 @@ define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) {
}
define <8 x i1> @no_convert_without_direct_bitcast(<8 x i16> %vec) {
-; CHECK-SD-LABEL: no_convert_without_direct_bitcast:
-; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: cmtst.8h v0, v0, v0
-; CHECK-SD-NEXT: xtn.8b v0, v0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: no_convert_without_direct_bitcast:
-; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: cmeq.8h v0, v0, #0
-; CHECK-GI-NEXT: mvn.16b v0, v0
-; CHECK-GI-NEXT: xtn.8b v0, v0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: no_convert_without_direct_bitcast:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmtst.8h v0, v0, v0
+; CHECK-NEXT: xtn.8b v0, v0
+; CHECK-NEXT: ret
%cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
ret <8 x i1> %cmp_result
@@ -882,8 +868,7 @@ define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) {
; CHECK-GI-NEXT: cmeq.4s v1, v1, v2
; CHECK-GI-NEXT: mvn.16b v1, v1
; CHECK-GI-NEXT: mov.s v0[3], w3
-; CHECK-GI-NEXT: cmeq.4s v0, v0, #0
-; CHECK-GI-NEXT: mvn.16b v0, v0
+; CHECK-GI-NEXT: cmtst.4s v0, v0, v0
; CHECK-GI-NEXT: mov.s w8, v0[1]
; CHECK-GI-NEXT: mov.s w9, v0[2]
; CHECK-GI-NEXT: mov.s w10, v0[3]
More information about the llvm-commits
mailing list