[llvm] [GlobalISel] Combine into abd[su] and legalize abd[su] (PR #179437)
Julian Pokrovsky via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 3 03:25:58 PST 2026
https://github.com/raventid created https://github.com/llvm/llvm-project/pull/179437
This patch adds a combine for (sub (max a, b), (min a, b)) -> (abd a, b) and (trunc (abs (sub (sext a), (sext b)))) -> (abds a, b). It also adds the necessary legalizer support for AArch64.
Resolves https://github.com/llvm/llvm-project/issues/118085
>From a7246906efd0e378c8e84eb8bbb0addd685fcb9d Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Wed, 28 Jan 2026 18:03:20 +0800
Subject: [PATCH] [GlobalISel] Combine into abd[su]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This patch adds a combine for (sub (max a, b), (min a, b)) -> (abd a, b) and (trunc (abs (sub (sext a), (sext b)))) -> (abds a, b). It also adds the necessary legalizer support for AArch64.
Co-authored-by: Thorsten Schütt <schuett at gmail.com>
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 18 +
.../CodeGen/GlobalISel/GenericMachineInstrs.h | 29 +
.../include/llvm/Target/GlobalISel/Combine.td | 68 +-
llvm/lib/CodeGen/GlobalISel/CMakeLists.txt | 1 +
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 30 +
.../GlobalISel/CombinerHelperCasts.cpp | 54 ++
.../GlobalISel/CombinerHelperSelect.cpp | 59 ++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 +
.../GlobalISel/legalizer-info-validation.mir | 14 +-
llvm/test/CodeGen/AArch64/abd-combine.ll | 715 +++++++++++++-----
10 files changed, 803 insertions(+), 187 deletions(-)
create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 5d4347066a40c..877a242948ccb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -1050,6 +1050,24 @@ class CombinerHelper {
// (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const;
+ // trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+ bool matchTruncAbds(const MachineInstr &MI);
+
+ // trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+ bool matchTruncAbdu(const MachineInstr &MI);
+
+ // select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abds(lhs, rhs)
+ bool matchSelectAbds(const MachineInstr &MI);
+
+ // select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abdu(lhs, rhs)
+ bool matchSelectAbdu(const MachineInstr &MI);
+
+ // sub(smax(lhs,rhs), smin(lhs,rhs)) -> abds(lhs, rhs)
+ bool matchSubAbds(const MachineInstr &MI);
+
+ // sub(umax(lhs,rhs), umin(lhs,rhs)) - abdu(lhs, rhs)
+ bool matchSubAbdu(const MachineInstr &MI);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 5faf57fd06228..85bb6ca76f62e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -853,6 +853,16 @@ class GFreeze : public GenericMachineInstr {
}
};
+/// Represents an abs.
+class GAbs : public GenericMachineInstr {
+public:
+ Register getSourceReg() const { return getOperand(1).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_ABS;
+ }
+};
+
/// Represents a cast operation.
/// It models the llvm::CastInst concept.
/// The exception is bitcast.
@@ -1028,6 +1038,25 @@ class GSplatVector : public GenericMachineInstr {
};
};
+/// Represents an integer max or min op.
+class GMaxMinOp : public GenericMachineInstr {
+public:
+ Register getLHSReg() const { return getReg(1); }
+ Register getRHSReg() const { return getReg(2); }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_UMIN:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index dae5809a4e052..d1f14bb9b696e 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -2108,6 +2108,72 @@ def overflow_combines: GICombineGroup<[
match_subo_no_overflow
]>;
+// trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+def trunc_abds : GICombineRule<
+ (defs root:$root),
+ (match (G_SEXT $lhs, $x),
+ (G_SEXT $rhs, $y),
+ (G_SUB $sub, $lhs, $rhs),
+ (G_ABS $abs, $sub),
+ (G_TRUNC $root, $abs):$trunc,
+ [{ return Helper.matchTruncAbds(*${trunc}); }]),
+ (apply (G_ABDS $root, $x, $y))>;
+
+// trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+def trunc_abdu : GICombineRule<
+ (defs root:$root),
+ (match (G_ZEXT $lhs, $x),
+ (G_ZEXT $rhs, $y),
+ (G_SUB $sub, $lhs, $rhs),
+ (G_ABS $abs, $sub),
+ (G_TRUNC $root, $abs):$trunc,
+ [{ return Helper.matchTruncAbdu(*${trunc}); }]),
+ (apply (G_ABDU $root, $x, $y))>;
+
+def select_abds : GICombineRule<
+ (defs root:$root),
+ (match (G_SUB $lhs, $inputr, $inputl),
+ (G_SUB $rhs, $inputl, $inputr),
+ (G_ICMP $cond, $p, $inputl, $inputr),
+ (G_SELECT $root, $cond, $lhs, $rhs):$select,
+ [{ return ${p}.getPredicate() == CmpInst::ICMP_SLT && Helper.matchSelectAbds(*${select}); }]),
+ (apply (G_ABDS $root, $inputl, $inputr))>;
+
+def select_abdu : GICombineRule<
+ (defs root:$root),
+ (match (G_SUB $lhs, $inputr, $inputl),
+ (G_SUB $rhs, $inputl, $inputr),
+ (G_ICMP $cond, $p, $inputl, $inputr),
+ (G_SELECT $root, $cond, $lhs, $rhs):$select,
+ [{ return ${p}.getPredicate() == CmpInst::ICMP_ULT && Helper.matchSelectAbdu(*${select}); }]),
+ (apply (G_ABDU $root, $inputl, $inputr))>;
+
+def sub_abds : GICombineRule<
+ (defs root:$root),
+ (match (G_SMAX $smax, $inputl, $inputr),
+ (G_SMIN $smin, $inputl, $inputr),
+ (G_SUB $root, $smax, $smin):$sub,
+ [{ return Helper.matchSubAbds(*${sub}); }]),
+ (apply (G_ABDS $root, $inputl, $inputr))>;
+
+def sub_abdu : GICombineRule<
+ (defs root:$root),
+ (match (G_UMAX $umax, $inputl, $inputr),
+ (G_UMIN $umin, $inputl, $inputr),
+ (G_SUB $root, $umax, $umin):$sub,
+ [{ return Helper.matchSubAbdu(*${sub}); }]),
+ (apply (G_ABDU $root, $inputl, $inputr))>;
+
+def abd_su_combines: GICombineGroup<[
+ trunc_abds,
+ trunc_abdu,
+ select_abds,
+ select_abdu,
+ sub_abds,
+ sub_abdu
+]>;
+
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -2175,7 +2241,7 @@ def shuffle_combines : GICombineGroup<[combine_shuffle_concat,
def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
vector_ops_combines, freeze_combines, cast_combines,
insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
- combine_extracted_vector_load,
+ combine_extracted_vector_load, abd_su_combines,
undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain, cmp_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index 27b6ea745921a..e3e49321923fd 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMGlobalISel
CombinerHelperArtifacts.cpp
CombinerHelperCasts.cpp
CombinerHelperCompares.cpp
+ CombinerHelperSelect.cpp
CombinerHelperVectorOps.cpp
GIMatchTableExecutor.cpp
GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 08696f08284b1..7f304e51aeb7f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -8598,4 +8598,34 @@ bool CombinerHelper::matchCtls(MachineInstr &CtlzMI,
};
return true;
+// sub(smax(lhs,rhs), smin(lhs,rhs)) -> abds(lhs, rhs)
+bool CombinerHelper::matchSubAbds(const MachineInstr &MI) {
+ const GSub *Sub = cast<GSub>(&MI);
+ const GMaxMinOp *LHS = cast<GMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+ const GMaxMinOp *RHS = cast<GMaxMinOp>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ if (!MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+ return false;
+
+ Register Dst = Sub->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}});
+}
+
+// sub(umax(lhs,rhs), umin(lhs,rhs)) -> abdu(lhs, rhs)
+bool CombinerHelper::matchSubAbdu(const MachineInstr &MI) {
+ const GSub *Sub = cast<GSub>(&MI);
+ const GMaxMinOp *LHS = cast<GMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+ const GMaxMinOp *RHS = cast<GMaxMinOp>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ if (!MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+ return false;
+
+ Register Dst = Sub->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}});
}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index fc7e3ae5b7942..e0b6d826c9697 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -412,4 +412,58 @@ bool CombinerHelper::matchRedundantSextInReg(MachineInstr &Root,
}
return true;
+// trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+bool CombinerHelper::matchTruncAbds(const MachineInstr &MI) {
+ const GTrunc *Trunc = cast<GTrunc>(&MI);
+ const GAbs *Abs = cast<GAbs>(MRI.getVRegDef(Trunc->getSrcReg()));
+ const GSub *Sub = cast<GSub>(MRI.getVRegDef(Abs->getSourceReg()));
+
+ Register Dst = Trunc->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ GSext *SextLHS = cast<GSext>(MRI.getVRegDef(Sub->getLHSReg()));
+ GSext *SextRHS = cast<GSext>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ LLT SextLHSTy = MRI.getType(SextLHS->getSrcReg());
+ LLT SextRHSTy = MRI.getType(SextRHS->getSrcReg());
+
+ if (SextLHSTy != SextRHSTy || DstTy != SextLHSTy)
+ return false;
+
+ // one-use
+ if (!MRI.hasOneNonDBGUse(Abs->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Sub->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Sub->getLHSReg()) ||
+ !MRI.hasOneNonDBGUse(Sub->getRHSReg()))
+ return false;
+
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}});
+}
+
+// trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+bool CombinerHelper::matchTruncAbdu(const MachineInstr &MI) {
+ const GTrunc *Trunc = cast<GTrunc>(&MI);
+ const GAbs *Abs = cast<GAbs>(MRI.getVRegDef(Trunc->getSrcReg()));
+ const GSub *Sub = cast<GSub>(MRI.getVRegDef(Abs->getSourceReg()));
+
+ Register Dst = Trunc->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ GZext *ZextLHS = cast<GZext>(MRI.getVRegDef(Sub->getLHSReg()));
+ GZext *ZextRHS = cast<GZext>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ LLT ZextLHSTy = MRI.getType(ZextLHS->getSrcReg());
+ LLT ZextRHSTy = MRI.getType(ZextRHS->getSrcReg());
+
+ if (ZextLHSTy != ZextRHSTy || DstTy != ZextLHSTy)
+ return false;
+
+ // one-use
+ if (!MRI.hasOneNonDBGUse(Abs->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Sub->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Sub->getLHSReg()) ||
+ !MRI.hasOneNonDBGUse(Sub->getRHSReg()))
+ return false;
+
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}});
}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
new file mode 100644
index 0000000000000..01125a8312e48
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
@@ -0,0 +1,59 @@
+//===- CombinerHelperSelect.cpp--------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_SELECT.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+// select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs) -> abds(lhs, rhs)
+bool CombinerHelper::matchSelectAbds(const MachineInstr &MI) {
+ const GSelect *Select = cast<GSelect>(&MI);
+ GSub *LHS = cast<GSub>(MRI.getVRegDef(Select->getTrueReg()));
+ GSub *RHS = cast<GSub>(MRI.getVRegDef(Select->getFalseReg()));
+
+ if (!MRI.hasOneNonDBGUse(Select->getCondReg()) ||
+ !MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+ return false;
+
+ Register Dst = Select->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}});
+}
+
+// select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abdu(lhs, rhs)
+bool CombinerHelper::matchSelectAbdu(const MachineInstr &MI) {
+ const GSelect *Select = cast<GSelect>(&MI);
+ GSub *LHS = cast<GSub>(MRI.getVRegDef(Select->getTrueReg()));
+ GSub *RHS = cast<GSub>(MRI.getVRegDef(Select->getFalseReg()));
+
+ if (!MRI.hasOneNonDBGUse(Select->getCondReg()) ||
+ !MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+ return false;
+
+ Register Dst = Select->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}});
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 131c72a24964a..bc5b6136397a0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1439,6 +1439,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
+ getActionDefinitionsBuilder({G_ABDS, G_ABDU})
+ .legalFor({{v8s8}, {v4s16}, {v2s32}, {v16s8}, {v8s16}, {v4s32}});
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 655e793f55c59..af4f916d96978 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -70,14 +70,14 @@
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
-# DEBUG-NEXT: G_ABDS (opcode [[G_ABDS:[0-9]+]]): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_ABDS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. the first uncovered type index: 1, OK
+# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
-# DEBUG-NEXT: G_ABDU (opcode [[G_ABDU:[0-9]+]]): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode [[G_ABDU]] is aliased to [[G_ABDS]]
-# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_ABDU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. the first uncovered type index: 1, OK
+# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
# DEBUG-NEXT: G_UAVGFLOOR (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index cdb40ceb46b1e..ff5ecc8fd2ce8 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -1,11 +1,21 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
-; CHECK-LABEL: abdu_base:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_base:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_base:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: usubl v2.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: usubl2 v0.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -15,12 +25,24 @@ define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
}
define <8 x i16> @abdu_const(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4h, #1
-; CHECK-NEXT: mov v1.d[1], v1.d[0]
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.4h, #1
+; CHECK-SD-NEXT: mov v1.d[1], v1.d[0]
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4s, #1
+; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT: sub v2.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -29,12 +51,22 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
}
define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const_lhs:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4h, #1
-; CHECK-NEXT: mov v1.d[1], v1.d[0]
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const_lhs:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.4h, #1
+; CHECK-SD-NEXT: mov v1.d[1], v1.d[0]
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const_lhs:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4s, #1
+; CHECK-GI-NEXT: usubw v2.4s, v1.4s, v0.4h
+; CHECK-GI-NEXT: usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -43,9 +75,20 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
}
define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: neg v2.4s, v2.4s
+; CHECK-GI-NEXT: usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -54,10 +97,17 @@ define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
}
define <8 x i16> @abdu_const_both() {
-; CHECK-LABEL: abdu_const_both:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const_both:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const_both:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.4s, #2
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
%result = trunc <8 x i32> %abs to <8 x i16>
@@ -65,10 +115,19 @@ define <8 x i16> @abdu_const_both() {
}
define <8 x i16> @abdu_const_bothhigh() {
-; CHECK-LABEL: abdu_const_bothhigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const_bothhigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const_bothhigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
+; CHECK-GI-NEXT: mvni v1.4h, #1
+; CHECK-GI-NEXT: usubl v0.4s, v1.4h, v0.4h
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
%zextsrc2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -78,9 +137,18 @@ define <8 x i16> @abdu_const_bothhigh() {
}
define <8 x i16> @abdu_undef(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_undef:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_undef:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_undef:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT: abs v1.4s, v1.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = zext <8 x i16> undef to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -90,10 +158,18 @@ define <8 x i16> @abdu_undef(<8 x i16> %src1) {
}
define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ugt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_ugt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_ugt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmhi v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp ugt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -102,10 +178,18 @@ define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_uge:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_uge:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_uge:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmhs v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp uge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -114,10 +198,18 @@ define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ult:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_ult:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_ult:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmhi v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp ult <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -126,10 +218,18 @@ define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ule:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_ule:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_ule:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmhs v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp ule <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -138,10 +238,18 @@ define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sgt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_sgt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_sgt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmgt v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp sgt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -150,10 +258,18 @@ define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sge:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_sge:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_sge:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmge v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp sge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -162,10 +278,18 @@ define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_slt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_slt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_slt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmgt v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp slt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -174,10 +298,18 @@ define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abds_sle(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sle:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_sle:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_sle:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmge v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp sle <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -206,75 +338,128 @@ define <8 x i16> @abdu_i_const(<8 x i16> %src1) {
}
define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_const_lhs:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.8h, #1
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_lhs:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.8h, #1
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_lhs:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.8h, #1
+; CHECK-GI-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_const_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_both() {
-; CHECK-LABEL: abdu_i_const_both:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_both:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_both:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.8h, #3
+; CHECK-GI-NEXT: movi v1.8h, #1
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_bothhigh() {
-; CHECK-LABEL: abdu_i_const_bothhigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_bothhigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_bothhigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: mvni v1.8h, #1
+; CHECK-GI-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_onehigh() {
-; CHECK-LABEL: abdu_i_const_onehigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32765 // =0x7ffd
-; CHECK-NEXT: dup v0.8h, w8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_onehigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #32765 // =0x7ffd
+; CHECK-SD-NEXT: dup v0.8h, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_onehigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.8h, #1
+; CHECK-GI-NEXT: adrp x8, .LCPI21_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_oneneg() {
-; CHECK-LABEL: abdu_i_const_oneneg:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #128, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_oneneg:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #128, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_oneneg:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI22_0
+; CHECK-GI-NEXT: mvni v0.8h, #1
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_undef:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_undef:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_undef:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
ret <8 x i16> %result
}
@@ -297,10 +482,19 @@ define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) {
define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
-; CHECK-LABEL: abds_base:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_base:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_base:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ssubl v2.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ssubl2 v0.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -310,12 +504,24 @@ define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
}
define <8 x i16> @abds_const(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4h, #1
-; CHECK-NEXT: mov v1.d[1], v1.d[0]
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.4h, #1
+; CHECK-SD-NEXT: mov v1.d[1], v1.d[0]
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4s, #1
+; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT: sub v2.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -324,12 +530,22 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
}
define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const_lhs:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4h, #1
-; CHECK-NEXT: mov v1.d[1], v1.d[0]
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const_lhs:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.4h, #1
+; CHECK-SD-NEXT: mov v1.d[1], v1.d[0]
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const_lhs:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4s, #1
+; CHECK-GI-NEXT: ssubw v2.4s, v1.4s, v0.4h
+; CHECK-GI-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -338,13 +554,24 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
}
define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: abs v0.4h, v0.4h
-; CHECK-NEXT: abs v1.4h, v1.4h
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: abs v0.4h, v0.4h
+; CHECK-SD-NEXT: abs v1.4h, v1.4h
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: neg v2.4s, v2.4s
+; CHECK-GI-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -353,10 +580,17 @@ define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
}
define <8 x i16> @abds_const_both() {
-; CHECK-LABEL: abds_const_both:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const_both:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const_both:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.4s, #2
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
%result = trunc <8 x i32> %abs to <8 x i16>
@@ -364,10 +598,19 @@ define <8 x i16> @abds_const_both() {
}
define <8 x i16> @abds_const_bothhigh() {
-; CHECK-LABEL: abds_const_bothhigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const_bothhigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const_bothhigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
+; CHECK-GI-NEXT: mvni v1.4h, #1
+; CHECK-GI-NEXT: ssubl v0.4s, v1.4h, v0.4h
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
%zextsrc2 = sext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -377,14 +620,23 @@ define <8 x i16> @abds_const_bothhigh() {
}
define <8 x i16> @abds_undef(<8 x i16> %src1) {
-; CHECK-LABEL: abds_undef:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_undef:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-SD-NEXT: abs v0.4s, v0.4s
+; CHECK-SD-NEXT: abs v1.4s, v1.4s
+; CHECK-SD-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_undef:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT: abs v1.4s, v1.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = sext <8 x i16> undef to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -415,75 +667,129 @@ define <8 x i16> @abds_i_const(<8 x i16> %src1) {
}
define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abds_i_const_lhs:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.8h, #1
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_lhs:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.8h, #1
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_lhs:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.8h, #1
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abds_i_const_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v0.8h, v0.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v0.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_both() {
-; CHECK-LABEL: abds_i_const_both:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_both:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_both:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.8h, #3
+; CHECK-GI-NEXT: movi v1.8h, #1
+; CHECK-GI-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_bothhigh() {
-; CHECK-LABEL: abds_i_const_bothhigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_bothhigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_bothhigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI38_0
+; CHECK-GI-NEXT: mvni v0.8h, #128, lsl #8
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_onehigh() {
-; CHECK-LABEL: abds_i_const_onehigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32765 // =0x7ffd
-; CHECK-NEXT: dup v0.8h, w8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_onehigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #32765 // =0x7ffd
+; CHECK-SD-NEXT: dup v0.8h, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_onehigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.8h, #1
+; CHECK-GI-NEXT: adrp x8, .LCPI39_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_oneneg() {
-; CHECK-LABEL: abds_i_const_oneneg:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #128, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_oneneg:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #128, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_oneneg:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI40_0
+; CHECK-GI-NEXT: mvni v0.8h, #1
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI40_0]
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abds_i_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abds_i_undef:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_undef:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_undef:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
ret <8 x i16> %result
}
@@ -502,14 +808,28 @@ define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) {
}
define <1 x i64> @recursive() {
-; CHECK-LABEL: recursive:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8b, #254
-; CHECK-NEXT: ushll v1.8h, v0.8b, #0
-; CHECK-NEXT: dup v0.8b, v0.b[0]
-; CHECK-NEXT: saddlp v1.1d, v1.2s
-; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: recursive:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8b, #254
+; CHECK-SD-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-SD-NEXT: dup v0.8b, v0.b[0]
+; CHECK-SD-NEXT: saddlp v1.1d, v1.2s
+; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: recursive:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff
+; CHECK-GI-NEXT: movi v1.8b, #1
+; CHECK-GI-NEXT: uabd v2.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: uabdl v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: dup v1.16b, v2.b[0]
+; CHECK-GI-NEXT: saddlp v0.1d, v0.2s
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: fmov x9, d0
+; CHECK-GI-NEXT: orr x8, x8, x9
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%1 = tail call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> zeroinitializer, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%2 = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
%3 = zext <8 x i8> %2 to <8 x i16>
@@ -523,6 +843,43 @@ define <1 x i64> @recursive() {
ret <1 x i64> %10
}
+define <8 x i16> @abds_sub_smax(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-SD-LABEL: abds_sub_smax:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_sub_smax:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: smax v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: smin v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v2.8h, v0.8h
+; CHECK-GI-NEXT: ret
+ %smax = call <8 x i16> @llvm.smax(<8 x i16> %src1, <8 x i16> %src2)
+ %smin = call <8 x i16> @llvm.smin(<8 x i16> %src1, <8 x i16> %src2)
+ %result = sub <8 x i16> %smax, %smin
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_sub_umax(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-SD-LABEL: abdu_sub_umax:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_sub_umax:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: umax v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: umin v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v2.8h, v0.8h
+; CHECK-GI-NEXT: ret
+ %umax = call <8 x i16> @llvm.umax(<8 x i16> %src1, <8 x i16> %src2)
+ %umin = call <8 x i16> @llvm.umin(<8 x i16> %src1, <8 x i16> %src2)
+ %result = sub <8 x i16> %umax, %umin
+ ret <8 x i16> %result
+}
+
+
declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>)
declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>)
declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
More information about the llvm-commits
mailing list