[llvm] [AArch64] Expand scmp/ucmp vector operations with sub (PR #108830)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 16 07:11:10 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/108830
Unlike scalar, where AArch64 prefers expanding scmp/ucmp with select, under Neon we can use the arithmetic expansion to generate fewer instructions. Notably it also prevents the scalarization of vselect during vector-legalization.
>From e016b0b914fa09a1ce74e3e3863864aab06a4ff2 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 16 Sep 2024 14:33:08 +0100
Subject: [PATCH] [AArch64] Expand scmp/ucmp vector operations with sub
Unlike scalar, where AArch64 prefers expanding scmp/ucmp with select, under
Neon we can use the arithmetic expansion to generate fewer instructions.
Notably it also prevents the scalarization of vselect during
vector-legalization.
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 +-
llvm/include/llvm/CodeGen/TargetLowering.h | 2 +-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 2 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 6 +
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +-
llvm/lib/Target/SystemZ/SystemZISelLowering.h | 2 +-
llvm/test/Analysis/CostModel/AArch64/cmp.ll | 24 +--
llvm/test/CodeGen/AArch64/scmp.ll | 190 +++---------------
llvm/test/CodeGen/AArch64/ucmp.ll | 190 +++---------------
9 files changed, 80 insertions(+), 341 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 50dc7d5c54c54a..caa3a57ebabc2e 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2451,7 +2451,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
CmpIntrinsic::getLTPredicate(IID),
CostKind);
- if (TLI->shouldExpandCmpUsingSelects()) {
+ EVT VT = TLI->getValueType(DL, CmpTy, true);
+ if (TLI->shouldExpandCmpUsingSelects(VT)) {
// x < y ? -1 : (x > y ? 1 : 0)
Cost += 2 * thisT()->getCmpSelInstrCost(
BinaryOperator::Select, RetTy, CondTy,
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e17d68d2690c86..802510dd0e4fa0 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3409,7 +3409,7 @@ class TargetLoweringBase {
/// Should we expand [US]CMP nodes using two selects and two compares, or by
/// doing arithmetic on boolean types
- virtual bool shouldExpandCmpUsingSelects() const { return false; }
+ virtual bool shouldExpandCmpUsingSelects(EVT VT) const { return false; }
/// Does this target support complex deinterleaving
virtual bool isComplexDeinterleavingSupported() const { return false; }
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ca379a691da918..95937886280685 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10681,7 +10681,7 @@ SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
// because one of the conditions can be merged with one of the selects.
// And finally, if we don't know the contents of high bits of a boolean value
// we can't perform any arithmetic either.
- if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 ||
+ if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
getBooleanContents(BoolVT) == UndefinedBooleanContent) {
SDValue SelectZeroOrOne =
DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 47da9d577cd827..d41f45ac0ce823 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27781,6 +27781,12 @@ bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
}
+bool AArch64TargetLowering::shouldExpandCmpUsingSelects(EVT VT) const {
+ // Expand scalar and SVE operations using selects. Neon vectors prefer sub to
+ // avoid vselect becoming bsl / unrolling.
+ return !VT.isFixedLengthVector();
+}
+
MachineInstr *
AArch64TargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator &MBBI,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index f9d45b02d30e30..06b918f9ccaa28 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -914,7 +914,7 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
- bool shouldExpandCmpUsingSelects() const override { return true; }
+ bool shouldExpandCmpUsingSelects(EVT VT) const override;
bool isComplexDeinterleavingSupported() const override;
bool isComplexDeinterleavingOperationSupported(
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 1e7285e3e0fc53..4a18bde00a0b98 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -507,7 +507,7 @@ class SystemZTargetLowering : public TargetLowering {
bool shouldConsiderGEPOffsetSplit() const override { return true; }
- bool shouldExpandCmpUsingSelects() const override { return true; }
+ bool shouldExpandCmpUsingSelects(EVT VT) const override { return true; }
const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll
index 1b4b5eb616b5a9..a56ca8890e307b 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cmp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll
@@ -128,16 +128,16 @@ define void @uscmp() {
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u16 = call i16 @llvm.ucmp.i16.i16(i16 undef, i16 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u32 = call i32 @llvm.ucmp.i32.i32(i32 undef, i32 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u64 = call i64 @llvm.ucmp.i64.i64(i64 undef, i64 undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s8 = call i8 @llvm.scmp.i8.i8(i8 undef, i8 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s16 = call i16 @llvm.scmp.i16.i16(i16 undef, i16 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s32 = call i32 @llvm.scmp.i32.i32(i32 undef, i32 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s64 = call i64 @llvm.scmp.i64.i64(i64 undef, i64 undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-SIZE-LABEL: 'uscmp'
@@ -145,16 +145,16 @@ define void @uscmp() {
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u16 = call i16 @llvm.ucmp.i16.i16(i16 undef, i16 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u32 = call i32 @llvm.ucmp.i32.i32(i32 undef, i32 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u64 = call i64 @llvm.ucmp.i64.i64(i64 undef, i64 undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s8 = call i8 @llvm.scmp.i8.i8(i8 undef, i8 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s16 = call i16 @llvm.scmp.i16.i16(i16 undef, i16 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s32 = call i32 @llvm.scmp.i32.i32(i32 undef, i32 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s64 = call i64 @llvm.scmp.i64.i64(i64 undef, i64 undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%u8 = call i8 @llvm.ucmp(i8 undef, i8 undef)
diff --git a/llvm/test/CodeGen/AArch64/scmp.ll b/llvm/test/CodeGen/AArch64/scmp.ll
index 3d18a904ed2d3f..4aff5a836e1a18 100644
--- a/llvm/test/CodeGen/AArch64/scmp.ll
+++ b/llvm/test/CodeGen/AArch64/scmp.ll
@@ -136,11 +136,9 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
define <8 x i8> @s_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-SD-LABEL: s_v8i8:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.8b, #1
-; CHECK-SD-NEXT: cmgt v3.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: cmgt v2.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: cmgt v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v8i8:
@@ -160,11 +158,9 @@ entry:
define <16 x i8> @s_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SD-LABEL: s_v16i8:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.16b, #1
-; CHECK-SD-NEXT: cmgt v3.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: cmgt v2.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: cmgt v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v16i8:
@@ -184,11 +180,9 @@ entry:
define <4 x i16> @s_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-SD-LABEL: s_v4i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.4h, #1
-; CHECK-SD-NEXT: cmgt v3.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: cmgt v2.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: cmgt v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v4i16:
@@ -208,11 +202,9 @@ entry:
define <8 x i16> @s_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SD-LABEL: s_v8i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.8h, #1
-; CHECK-SD-NEXT: cmgt v3.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: cmgt v2.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: cmgt v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v8i16:
@@ -232,15 +224,12 @@ entry:
define <16 x i16> @s_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK-SD-LABEL: s_v16i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v4.8h, #1
+; CHECK-SD-NEXT: cmgt v4.8h, v1.8h, v3.8h
; CHECK-SD-NEXT: cmgt v5.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: cmgt v6.8h, v1.8h, v3.8h
; CHECK-SD-NEXT: cmgt v0.8h, v2.8h, v0.8h
; CHECK-SD-NEXT: cmgt v1.8h, v3.8h, v1.8h
-; CHECK-SD-NEXT: and v2.16b, v5.16b, v4.16b
-; CHECK-SD-NEXT: and v3.16b, v6.16b, v4.16b
-; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b
-; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT: sub v0.8h, v0.8h, v5.8h
+; CHECK-SD-NEXT: sub v1.8h, v1.8h, v4.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v16i16:
@@ -264,11 +253,9 @@ entry:
define <2 x i32> @s_v2i32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-SD-LABEL: s_v2i32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.2s, #1
-; CHECK-SD-NEXT: cmgt v3.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: cmgt v2.2s, v0.2s, v1.2s
; CHECK-SD-NEXT: cmgt v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v2i32:
@@ -288,11 +275,9 @@ entry:
define <4 x i32> @s_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-SD-LABEL: s_v4i32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.4s, #1
-; CHECK-SD-NEXT: cmgt v3.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: cmgt v2.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: cmgt v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v4i32:
@@ -312,15 +297,12 @@ entry:
define <8 x i32> @s_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-SD-LABEL: s_v8i32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v4.4s, #1
+; CHECK-SD-NEXT: cmgt v4.4s, v1.4s, v3.4s
; CHECK-SD-NEXT: cmgt v5.4s, v0.4s, v2.4s
-; CHECK-SD-NEXT: cmgt v6.4s, v1.4s, v3.4s
; CHECK-SD-NEXT: cmgt v0.4s, v2.4s, v0.4s
; CHECK-SD-NEXT: cmgt v1.4s, v3.4s, v1.4s
-; CHECK-SD-NEXT: and v2.16b, v5.16b, v4.16b
-; CHECK-SD-NEXT: and v3.16b, v6.16b, v4.16b
-; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b
-; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT: sub v0.4s, v0.4s, v5.4s
+; CHECK-SD-NEXT: sub v1.4s, v1.4s, v4.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v8i32:
@@ -344,12 +326,9 @@ entry:
define <2 x i64> @s_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-SD-LABEL: s_v2i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: cmgt v0.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: dup v3.2d, x8
-; CHECK-SD-NEXT: and v1.16b, v2.16b, v3.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v2i64:
@@ -370,16 +349,12 @@ entry:
define <4 x i64> @s_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-SD-LABEL: s_v4i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov w8, #1 // =0x1
-; CHECK-SD-NEXT: cmgt v4.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: cmgt v6.2d, v1.2d, v3.2d
-; CHECK-SD-NEXT: dup v5.2d, x8
+; CHECK-SD-NEXT: cmgt v4.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: cmgt v5.2d, v0.2d, v2.2d
; CHECK-SD-NEXT: cmgt v0.2d, v2.2d, v0.2d
; CHECK-SD-NEXT: cmgt v1.2d, v3.2d, v1.2d
-; CHECK-SD-NEXT: and v2.16b, v4.16b, v5.16b
-; CHECK-SD-NEXT: and v3.16b, v6.16b, v5.16b
-; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b
-; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT: sub v0.2d, v0.2d, v5.2d
+; CHECK-SD-NEXT: sub v1.2d, v1.2d, v4.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: s_v4i64:
@@ -404,122 +379,13 @@ entry:
define <16 x i8> @signOf_neon_scmp(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16> %s1_lo, <8 x i16> %s1_hi) {
; CHECK-SD-LABEL: signOf_neon_scmp:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmgt v5.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: cmgt v2.8h, v2.8h, v0.8h
; CHECK-SD-NEXT: cmgt v4.8h, v1.8h, v3.8h
; CHECK-SD-NEXT: cmgt v1.8h, v3.8h, v1.8h
-; CHECK-SD-NEXT: umov w8, v5.h[1]
-; CHECK-SD-NEXT: umov w9, v2.h[1]
-; CHECK-SD-NEXT: umov w10, v5.h[0]
-; CHECK-SD-NEXT: umov w11, v2.h[0]
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: cset w8, ne
-; CHECK-SD-NEXT: tst w9, #0xffff
-; CHECK-SD-NEXT: csinv w8, w8, wzr, eq
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v5.h[2]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w11, #0xffff
-; CHECK-SD-NEXT: umov w11, v2.h[2]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[3]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w11, #0xffff
-; CHECK-SD-NEXT: mov v0.b[1], w8
-; CHECK-SD-NEXT: umov w8, v5.h[3]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[2], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v5.h[4]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[4]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[3], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v5.h[5]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[5]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[4], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v5.h[6]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[6]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[5], w9
-; CHECK-SD-NEXT: umov w9, v5.h[7]
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: cset w8, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[7]
-; CHECK-SD-NEXT: csinv w8, w8, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[6], w8
-; CHECK-SD-NEXT: tst w9, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[0]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[0]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[7], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[1]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[1]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[8], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[2]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[2]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[9], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[3]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[3]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[10], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[4]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[4]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[11], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[5]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[5]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[12], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[6]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[6]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[13], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[7]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[7]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[14], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: cset w8, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: csinv w8, w8, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[15], w8
+; CHECK-SD-NEXT: cmgt v3.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: cmgt v0.8h, v2.8h, v0.8h
+; CHECK-SD-NEXT: sub v1.8h, v1.8h, v4.8h
+; CHECK-SD-NEXT: sub v0.8h, v0.8h, v3.8h
+; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: signOf_neon_scmp:
diff --git a/llvm/test/CodeGen/AArch64/ucmp.ll b/llvm/test/CodeGen/AArch64/ucmp.ll
index 7e94cb6c103b52..125ac7f61a41e5 100644
--- a/llvm/test/CodeGen/AArch64/ucmp.ll
+++ b/llvm/test/CodeGen/AArch64/ucmp.ll
@@ -176,11 +176,9 @@ define <1 x i64> @ucmp.1.64.65(<1 x i65> %x, <1 x i65> %y) {
define <8 x i8> @u_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-SD-LABEL: u_v8i8:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.8b, #1
-; CHECK-SD-NEXT: cmhi v3.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: cmhi v2.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: cmhi v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v8i8:
@@ -200,11 +198,9 @@ entry:
define <16 x i8> @u_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SD-LABEL: u_v16i8:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.16b, #1
-; CHECK-SD-NEXT: cmhi v3.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: cmhi v2.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: cmhi v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v16i8:
@@ -224,11 +220,9 @@ entry:
define <4 x i16> @u_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-SD-LABEL: u_v4i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.4h, #1
-; CHECK-SD-NEXT: cmhi v3.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: cmhi v2.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: cmhi v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v4i16:
@@ -248,11 +242,9 @@ entry:
define <8 x i16> @u_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SD-LABEL: u_v8i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.8h, #1
-; CHECK-SD-NEXT: cmhi v3.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: cmhi v2.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: cmhi v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v8i16:
@@ -272,15 +264,12 @@ entry:
define <16 x i16> @u_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK-SD-LABEL: u_v16i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v4.8h, #1
+; CHECK-SD-NEXT: cmhi v4.8h, v1.8h, v3.8h
; CHECK-SD-NEXT: cmhi v5.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: cmhi v6.8h, v1.8h, v3.8h
; CHECK-SD-NEXT: cmhi v0.8h, v2.8h, v0.8h
; CHECK-SD-NEXT: cmhi v1.8h, v3.8h, v1.8h
-; CHECK-SD-NEXT: and v2.16b, v5.16b, v4.16b
-; CHECK-SD-NEXT: and v3.16b, v6.16b, v4.16b
-; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b
-; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT: sub v0.8h, v0.8h, v5.8h
+; CHECK-SD-NEXT: sub v1.8h, v1.8h, v4.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v16i16:
@@ -304,11 +293,9 @@ entry:
define <2 x i32> @u_v2i32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-SD-LABEL: u_v2i32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.2s, #1
-; CHECK-SD-NEXT: cmhi v3.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: cmhi v2.2s, v0.2s, v1.2s
; CHECK-SD-NEXT: cmhi v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v2i32:
@@ -328,11 +315,9 @@ entry:
define <4 x i32> @u_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-SD-LABEL: u_v4i32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v2.4s, #1
-; CHECK-SD-NEXT: cmhi v3.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: cmhi v2.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v4i32:
@@ -352,15 +337,12 @@ entry:
define <8 x i32> @u_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-SD-LABEL: u_v8i32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v4.4s, #1
+; CHECK-SD-NEXT: cmhi v4.4s, v1.4s, v3.4s
; CHECK-SD-NEXT: cmhi v5.4s, v0.4s, v2.4s
-; CHECK-SD-NEXT: cmhi v6.4s, v1.4s, v3.4s
; CHECK-SD-NEXT: cmhi v0.4s, v2.4s, v0.4s
; CHECK-SD-NEXT: cmhi v1.4s, v3.4s, v1.4s
-; CHECK-SD-NEXT: and v2.16b, v5.16b, v4.16b
-; CHECK-SD-NEXT: and v3.16b, v6.16b, v4.16b
-; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b
-; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT: sub v0.4s, v0.4s, v5.4s
+; CHECK-SD-NEXT: sub v1.4s, v1.4s, v4.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v8i32:
@@ -384,12 +366,9 @@ entry:
define <2 x i64> @u_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-SD-LABEL: u_v2i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: cmhi v0.2d, v1.2d, v0.2d
-; CHECK-SD-NEXT: dup v3.2d, x8
-; CHECK-SD-NEXT: and v1.16b, v2.16b, v3.16b
-; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v2i64:
@@ -410,16 +389,12 @@ entry:
define <4 x i64> @u_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-SD-LABEL: u_v4i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov w8, #1 // =0x1
-; CHECK-SD-NEXT: cmhi v4.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT: cmhi v6.2d, v1.2d, v3.2d
-; CHECK-SD-NEXT: dup v5.2d, x8
+; CHECK-SD-NEXT: cmhi v4.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: cmhi v5.2d, v0.2d, v2.2d
; CHECK-SD-NEXT: cmhi v0.2d, v2.2d, v0.2d
; CHECK-SD-NEXT: cmhi v1.2d, v3.2d, v1.2d
-; CHECK-SD-NEXT: and v2.16b, v4.16b, v5.16b
-; CHECK-SD-NEXT: and v3.16b, v6.16b, v5.16b
-; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b
-; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT: sub v0.2d, v0.2d, v5.2d
+; CHECK-SD-NEXT: sub v1.2d, v1.2d, v4.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: u_v4i64:
@@ -444,122 +419,13 @@ entry:
define <16 x i8> @signOf_neon(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16> %s1_lo, <8 x i16> %s1_hi) {
; CHECK-SD-LABEL: signOf_neon:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: cmhi v5.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: cmhi v2.8h, v2.8h, v0.8h
; CHECK-SD-NEXT: cmhi v4.8h, v1.8h, v3.8h
; CHECK-SD-NEXT: cmhi v1.8h, v3.8h, v1.8h
-; CHECK-SD-NEXT: umov w8, v5.h[1]
-; CHECK-SD-NEXT: umov w9, v2.h[1]
-; CHECK-SD-NEXT: umov w10, v5.h[0]
-; CHECK-SD-NEXT: umov w11, v2.h[0]
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: cset w8, ne
-; CHECK-SD-NEXT: tst w9, #0xffff
-; CHECK-SD-NEXT: csinv w8, w8, wzr, eq
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v5.h[2]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w11, #0xffff
-; CHECK-SD-NEXT: umov w11, v2.h[2]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[3]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w11, #0xffff
-; CHECK-SD-NEXT: mov v0.b[1], w8
-; CHECK-SD-NEXT: umov w8, v5.h[3]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[2], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v5.h[4]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[4]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[3], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v5.h[5]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[5]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[4], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v5.h[6]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[6]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[5], w9
-; CHECK-SD-NEXT: umov w9, v5.h[7]
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: cset w8, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v2.h[7]
-; CHECK-SD-NEXT: csinv w8, w8, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[6], w8
-; CHECK-SD-NEXT: tst w9, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[0]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[0]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[7], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[1]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[1]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[8], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[2]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[2]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[9], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[3]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[3]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[10], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[4]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[4]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[11], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[5]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[5]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[12], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[6]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[6]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[13], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: umov w8, v4.h[7]
-; CHECK-SD-NEXT: cset w9, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: umov w10, v1.h[7]
-; CHECK-SD-NEXT: csinv w9, w9, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[14], w9
-; CHECK-SD-NEXT: tst w8, #0xffff
-; CHECK-SD-NEXT: cset w8, ne
-; CHECK-SD-NEXT: tst w10, #0xffff
-; CHECK-SD-NEXT: csinv w8, w8, wzr, eq
-; CHECK-SD-NEXT: mov v0.b[15], w8
+; CHECK-SD-NEXT: cmhi v3.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: cmhi v0.8h, v2.8h, v0.8h
+; CHECK-SD-NEXT: sub v1.8h, v1.8h, v4.8h
+; CHECK-SD-NEXT: sub v0.8h, v0.8h, v3.8h
+; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: signOf_neon:
More information about the llvm-commits
mailing list