[llvm] [GlobalISel] Combine into abd[su] and legalize abd[su] (PR #118865)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 13:12:04 PST 2024
https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/118865
>From cbda0d749d78057484248123000320440b283ada Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 30 Nov 2024 16:42:14 +0100
Subject: [PATCH 1/4] [GlobalISel] Combine into abd[su]
PowerPC, AArch64, and ARM have instructions. The other targets lower them.
We reverse those lowering as combines.
RISCV lowering:
// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
X86 lowering
// abds(lhs, rhs) -> select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
// abdu(lhs, rhs) -> select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
// i.e trunc(abs(sext(Op0) - sext(Op1))) becomes abds(Op0, Op1)
// or trunc(abs(zext(Op0) - zext(Op1))) becomes abdu(Op0, Op1)
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 20 +
.../CodeGen/GlobalISel/GenericMachineInstrs.h | 29 +
.../include/llvm/Target/GlobalISel/Combine.td | 66 +-
.../Target/GlobalISel/SelectionDAGCompat.td | 2 +
llvm/lib/CodeGen/GlobalISel/CMakeLists.txt | 1 +
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 46 ++
.../GlobalISel/CombinerHelperCasts.cpp | 70 ++
.../GlobalISel/CombinerHelperSelect.cpp | 74 ++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 3 +
llvm/test/CodeGen/AArch64/abd-combine.ll | 671 +++++++++++++-----
10 files changed, 792 insertions(+), 190 deletions(-)
create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 55c3b72c8e027f..e2b63f1f179bd3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -942,6 +942,26 @@ class CombinerHelper {
// overflow sub
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo);
+ // trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+ bool matchTruncAbds(const MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ // trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+ bool matchTruncAbdu(const MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ // select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abds(lhs, rhs)
+ bool matchSelectAbds(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ // select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abdu(lhs, rhs)
+ bool matchSelectAbdu(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ // sub(smax(lhs,rhs), smin(lhs,rhs)) -> abds(lhs, rhs)
+ bool matchSubAbds(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ // sub(umax(lhs,rhs), umin(lhs,rhs)) - abdu(lhs, rhs)
+ bool matchSubAbdu(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 9e5d4d34f24d2b..a99db10832c5a9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -850,6 +850,16 @@ class GFreeze : public GenericMachineInstr {
}
};
+/// Represents an abs.
+class GAbs : public GenericMachineInstr {
+public:
+ Register getSourceReg() const { return getOperand(1).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_ABS;
+ }
+};
+
/// Represents a cast operation.
/// It models the llvm::CastInst concept.
/// The exception is bitcast.
@@ -1022,6 +1032,25 @@ class GSplatVector : public GenericMachineInstr {
};
};
+/// Represents an integer max or min op.
+class GMMaxMinOp : public GenericMachineInstr {
+public:
+ Register getLHSReg() const { return getReg(1); }
+ Register getRHSReg() const { return getReg(2); }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_UMIN:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b0c63fc7c7b806..bd24bfb0587211 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1913,6 +1913,70 @@ def overflow_combines: GICombineGroup<[
match_subo_no_overflow
]>;
+def trunc_abds : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_SEXT $lhs, $x),
+ (G_SEXT $rhs, $y),
+ (G_SUB $sub, $lhs, $rhs),
+ (G_ABS $abs, $sub),
+ (G_TRUNC $root, $abs):$trunc,
+ [{ return Helper.matchTruncAbds(*${trunc}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${trunc}, ${matchinfo}); }])>;
+
+def trunc_abdu : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_ZEXT $lhs, $x),
+ (G_ZEXT $rhs, $y),
+ (G_SUB $sub, $lhs, $rhs),
+ (G_ABS $abs, $sub),
+ (G_TRUNC $root, $abs):$trunc,
+ [{ return Helper.matchTruncAbdu(*${trunc}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${trunc}, ${matchinfo}); }])>;
+
+def select_abds : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_SUB $lhs, $inputr, $inputl),
+ (G_SUB $rhs, $inputl, $inputr),
+ (G_ICMP $cond, $p, $inputl, $inputr),
+ (G_SELECT $root, $cond, $lhs, $rhs):$select,
+ [{ return ${p}.getPredicate() == CmpInst::ICMP_SLT && Helper.matchSelectAbds(*${select}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${select}, ${matchinfo}); }])>;
+
+def select_abdu : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_SUB $lhs, $inputr, $inputl),
+ (G_SUB $rhs, $inputl, $inputr),
+ (G_ICMP $cond, $p, $inputl, $inputr),
+ (G_SELECT $root, $cond, $lhs, $rhs):$select,
+ [{ return ${p}.getPredicate() == CmpInst::ICMP_ULT && Helper.matchSelectAbdu(*${select}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${select}, ${matchinfo}); }])>;
+
+def sub_abds : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_SMAX $smax, $inputl, $inputr),
+ (G_SMIN $smin, $inputl, $inputr),
+ (G_SUB $root, $smax, $smin):$sub,
+ [{ return Helper.matchSubAbds(*${sub}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${sub}, ${matchinfo}); }])>;
+
+def sub_abdu : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_UMAX $umax, $inputl, $inputr),
+ (G_UMIN $umin, $inputl, $inputr),
+ (G_SUB $root, $umax, $umin):$sub,
+ [{ return Helper.matchSubAbdu(*${sub}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${sub}, ${matchinfo}); }])>;
+
+def abd_su_combines: GICombineGroup<[
+ trunc_abds,
+ trunc_abdu,
+ select_abds,
+ select_abdu,
+ sub_abds,
+ sub_abdu
+]>;
+
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -1979,7 +2043,7 @@ def shuffle_combines : GICombineGroup<[combine_shuffle_concat,
def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
vector_ops_combines, freeze_combines, cast_combines,
insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
- combine_extracted_vector_load,
+ combine_extracted_vector_load, abd_su_combines,
undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain, cmp_combines,
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 2148f5be4c41aa..807d7195b811cb 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -76,6 +76,8 @@ def : GINodeEquiv<G_XOR, xor>;
def : GINodeEquiv<G_SHL, shl>;
def : GINodeEquiv<G_LSHR, srl>;
def : GINodeEquiv<G_ASHR, sra>;
+def : GINodeEquiv<G_ABDS, abds>;
+def : GINodeEquiv<G_ABDU, abdu>;
def : GINodeEquiv<G_SADDSAT, saddsat>;
def : GINodeEquiv<G_UADDSAT, uaddsat>;
def : GINodeEquiv<G_SSUBSAT, ssubsat>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index a45024d120be68..9d9aa7af1e3961 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMGlobalISel
CombinerHelperArtifacts.cpp
CombinerHelperCasts.cpp
CombinerHelperCompares.cpp
+ CombinerHelperSelect.cpp
CombinerHelperVectorOps.cpp
GIMatchTableExecutor.cpp
GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d95fc8cfbcf558..79ce5e92d5dc38 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7865,3 +7865,49 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
return false;
}
+
+// sub(smax(lhs,rhs), smin(lhs,rhs)) -> abds(lhs, rhs)
+bool CombinerHelper::matchSubAbds(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GSub *Sub = cast<GSub>(&MI);
+ const GMMaxMinOp *LHS = cast<GMMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+ const GMMaxMinOp *RHS = cast<GMMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+
+ if (!MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+ return false;
+
+ Register Dst = Sub->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAbds(Dst, LHS->getLHSReg(), LHS->getRHSReg());
+ };
+ return true;
+}
+
+// sub(umax(lhs,rhs), umin(lhs,rhs)) -> abdu(lhs, rhs)
+bool CombinerHelper::matchSubAbdu(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GSub *Sub = cast<GSub>(&MI);
+ const GMMaxMinOp *LHS = cast<GMMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+ const GMMaxMinOp *RHS = cast<GMMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+
+ if (!MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+ return false;
+
+ Register Dst = Sub->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAbdu(Dst, LHS->getLHSReg(), LHS->getRHSReg());
+ };
+ return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 30557e6a2304e6..16421ea088e9b5 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -359,3 +359,73 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
return false;
}
}
+
+// trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+bool CombinerHelper::matchTruncAbds(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GTrunc *Trunc = cast<GTrunc>(&MI);
+ const GAbs *Abs = cast<GAbs>(MRI.getVRegDef(Trunc->getSrcReg()));
+ const GSub *Sub = cast<GSub>(MRI.getVRegDef(Abs->getSourceReg()));
+
+ Register Dst = Trunc->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ GSext *SextLHS = cast<GSext>(MRI.getVRegDef(Sub->getLHSReg()));
+ GSext *SextRHS = cast<GSext>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ LLT SextLHSTy = MRI.getType(SextLHS->getSrcReg());
+ LLT SextRHSTy = MRI.getType(SextRHS->getSrcReg());
+
+ if (SextLHSTy != SextRHSTy || DstTy != SextLHSTy)
+ return false;
+
+ // one-use
+ if (!MRI.hasOneNonDBGUse(Abs->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Sub->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Sub->getLHSReg()) ||
+ !MRI.hasOneNonDBGUse(Sub->getRHSReg()))
+ return false;
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAbds(Dst, SextLHS->getSrcReg(), SextLHS->getSrcReg());
+ };
+ return true;
+}
+
+// trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+bool CombinerHelper::matchTruncAbdu(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GTrunc *Trunc = cast<GTrunc>(&MI);
+ const GAbs *Abs = cast<GAbs>(MRI.getVRegDef(Trunc->getSrcReg()));
+ const GSub *Sub = cast<GSub>(MRI.getVRegDef(Abs->getSourceReg()));
+
+ Register Dst = Trunc->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ GZext *ZextLHS = cast<GZext>(MRI.getVRegDef(Sub->getLHSReg()));
+ GZext *ZextRHS = cast<GZext>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ LLT ZextLHSTy = MRI.getType(ZextLHS->getSrcReg());
+ LLT ZextRHSTy = MRI.getType(ZextRHS->getSrcReg());
+
+ if (ZextLHSTy != ZextRHSTy || DstTy != ZextLHSTy)
+ return false;
+
+ // one-use
+ if (!MRI.hasOneNonDBGUse(Abs->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Sub->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Sub->getLHSReg()) ||
+ !MRI.hasOneNonDBGUse(Sub->getRHSReg()))
+ return false;
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAbdu(Dst, ZextLHS->getSrcReg(), ZextLHS->getSrcReg());
+ };
+ return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
new file mode 100644
index 00000000000000..9b40008e8fb87c
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
@@ -0,0 +1,74 @@
+//===- CombinerHelperSelect.cpp
+//--------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_SELECT.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+// select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs) -> abds(lhs, rhs)
+bool CombinerHelper::matchSelectAbds(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GSelect *Select = cast<GSelect>(&MI);
+ GSub *LHS = cast<GSub>(MRI.getVRegDef(Select->getTrueReg()));
+ GSub *RHS = cast<GSub>(MRI.getVRegDef(Select->getFalseReg()));
+
+ if (!MRI.hasOneNonDBGUse(Select->getCondReg()) ||
+ !MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+ return false;
+
+ Register Dst = Select->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAbds(Dst, RHS->getLHSReg(), RHS->getRHSReg());
+ };
+ return true;
+}
+
+// select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abdu(lhs, rhs)
+bool CombinerHelper::matchSelectAbdu(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GSelect *Select = cast<GSelect>(&MI);
+ GSub *LHS = cast<GSub>(MRI.getVRegDef(Select->getTrueReg()));
+ GSub *RHS = cast<GSub>(MRI.getVRegDef(Select->getFalseReg()));
+
+ if (!MRI.hasOneNonDBGUse(Select->getCondReg()) ||
+ !MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+ return false;
+
+ Register Dst = Select->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildAbdu(Dst, RHS->getLHSReg(), RHS->getRHSReg());
+ };
+ return true;
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 619a041c273cd8..7c980692f217b1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1344,6 +1344,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_SPLAT_VECTOR)
.legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
+ getActionDefinitionsBuilder({G_ABDS, G_ABDU})
+ .legalFor({{v8s8}, {v4s16}, {v2s32}, {v16s8}, {v8s16}, {v4s32}});
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index e48680f4be98b6..5442bea27441d1 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -1,11 +1,19 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for recursive
define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
-; CHECK-LABEL: abdu_base:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_base:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_base:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -15,17 +23,29 @@ define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
}
define <8 x i16> @abdu_const(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.4s, #1
+; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: sub v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: abs v1.4s, v1.4s
+; CHECK-SD-NEXT: abs v0.4s, v0.4s
+; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4s, #1
+; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT: sub v2.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -34,15 +54,25 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
}
define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const_lhs:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v2.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const_lhs:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.4s, #1
+; CHECK-SD-NEXT: usubw v2.4s, v1.4s, v0.4h
+; CHECK-SD-NEXT: usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-SD-NEXT: abs v0.4s, v0.4s
+; CHECK-SD-NEXT: abs v1.4s, v2.4s
+; CHECK-SD-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const_lhs:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4s, #1
+; CHECK-GI-NEXT: usubw v2.4s, v1.4s, v0.4h
+; CHECK-GI-NEXT: usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -51,16 +81,27 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
}
define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: ushll v2.4s, v0.4h, #0
-; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: neg v1.4s, v2.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.2d, #0000000000000000
+; CHECK-SD-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-SD-NEXT: usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-SD-NEXT: neg v1.4s, v2.4s
+; CHECK-SD-NEXT: abs v0.4s, v0.4s
+; CHECK-SD-NEXT: abs v1.4s, v1.4s
+; CHECK-SD-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: neg v2.4s, v2.4s
+; CHECK-GI-NEXT: usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -69,10 +110,17 @@ define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
}
define <8 x i16> @abdu_const_both() {
-; CHECK-LABEL: abdu_const_both:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const_both:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const_both:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.4s, #2
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
%result = trunc <8 x i32> %abs to <8 x i16>
@@ -80,10 +128,16 @@ define <8 x i16> @abdu_const_both() {
}
define <8 x i16> @abdu_const_bothhigh() {
-; CHECK-LABEL: abdu_const_bothhigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_const_bothhigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_const_bothhigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvni v0.8h, #1
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
%zextsrc2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -93,9 +147,14 @@ define <8 x i16> @abdu_const_bothhigh() {
}
define <8 x i16> @abdu_undef(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_undef:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_undef:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_undef:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = zext <8 x i16> undef to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -105,10 +164,18 @@ define <8 x i16> @abdu_undef(<8 x i16> %src1) {
}
define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ugt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_ugt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_ugt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmhi v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp ugt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -117,10 +184,18 @@ define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_uge:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_uge:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_uge:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmhs v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp uge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -141,10 +216,18 @@ define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ule:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_ule:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_ule:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmhs v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp ule <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -153,10 +236,18 @@ define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sgt:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_sgt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_sgt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmgt v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp sgt <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -165,10 +256,18 @@ define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sge:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_sge:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_sge:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmge v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp sge <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -189,10 +288,18 @@ define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
}
define <8 x i16> @abds_sle(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sle:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_sle:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_sle:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmge v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT: ret
%3 = icmp sle <8 x i16> %0, %1
%4 = sub <8 x i16> %0, %1
%5 = sub <8 x i16> %1, %0
@@ -221,75 +328,128 @@ define <8 x i16> @abdu_i_const(<8 x i16> %src1) {
}
define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_const_lhs:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.8h, #1
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_lhs:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.8h, #1
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_lhs:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.8h, #1
+; CHECK-GI-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_const_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_both() {
-; CHECK-LABEL: abdu_i_const_both:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_both:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_both:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.8h, #3
+; CHECK-GI-NEXT: movi v1.8h, #1
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_bothhigh() {
-; CHECK-LABEL: abdu_i_const_bothhigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_bothhigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_bothhigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT: mvni v1.8h, #1
+; CHECK-GI-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_onehigh() {
-; CHECK-LABEL: abdu_i_const_onehigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32765 // =0x7ffd
-; CHECK-NEXT: dup v0.8h, w8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_onehigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #32765 // =0x7ffd
+; CHECK-SD-NEXT: dup v0.8h, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_onehigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.8h, #1
+; CHECK-GI-NEXT: adrp x8, .LCPI21_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_const_oneneg() {
-; CHECK-LABEL: abdu_i_const_oneneg:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #128, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_const_oneneg:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #128, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_const_oneneg:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI22_0
+; CHECK-GI-NEXT: mvni v0.8h, #1
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-NEXT: uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_undef:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abdu_i_undef:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abdu_i_undef:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
ret <8 x i16> %result
}
@@ -312,10 +472,15 @@ define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) {
define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
-; CHECK-LABEL: abds_base:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_base:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_base:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -325,17 +490,29 @@ define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
}
define <8 x i16> @abds_const(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.4s, #1
+; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: sub v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: abs v1.4s, v1.4s
+; CHECK-SD-NEXT: abs v0.4s, v0.4s
+; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4s, #1
+; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT: sub v2.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -344,15 +521,25 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
}
define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const_lhs:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v2.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const_lhs:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.4s, #1
+; CHECK-SD-NEXT: ssubw v2.4s, v1.4s, v0.4h
+; CHECK-SD-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-SD-NEXT: abs v0.4s, v0.4s
+; CHECK-SD-NEXT: abs v1.4s, v2.4s
+; CHECK-SD-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const_lhs:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4s, #1
+; CHECK-GI-NEXT: ssubw v2.4s, v1.4s, v0.4h
+; CHECK-GI-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -361,16 +548,27 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
}
define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: sshll v2.4s, v0.4h, #0
-; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT: neg v1.4s, v2.4s
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.2d, #0000000000000000
+; CHECK-SD-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-SD-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-SD-NEXT: neg v1.4s, v2.4s
+; CHECK-SD-NEXT: abs v0.4s, v0.4s
+; CHECK-SD-NEXT: abs v1.4s, v1.4s
+; CHECK-SD-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: neg v2.4s, v2.4s
+; CHECK-GI-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT: abs v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -379,10 +577,17 @@ define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
}
define <8 x i16> @abds_const_both() {
-; CHECK-LABEL: abds_const_both:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const_both:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const_both:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.4s, #2
+; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
%result = trunc <8 x i32> %abs to <8 x i16>
@@ -390,10 +595,16 @@ define <8 x i16> @abds_const_both() {
}
define <8 x i16> @abds_const_bothhigh() {
-; CHECK-LABEL: abds_const_bothhigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_const_bothhigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_const_bothhigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvni v0.8h, #1
+; CHECK-GI-NEXT: sabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
%zextsrc2 = sext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -403,14 +614,19 @@ define <8 x i16> @abds_const_bothhigh() {
}
define <8 x i16> @abds_undef(<8 x i16> %src1) {
-; CHECK-LABEL: abds_undef:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-NEXT: abs v0.4s, v0.4s
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_undef:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-SD-NEXT: abs v0.4s, v0.4s
+; CHECK-SD-NEXT: abs v1.4s, v1.4s
+; CHECK-SD-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_undef:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT: ret
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
%zextsrc2 = sext <8 x i16> undef to <8 x i32>
%sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -441,75 +657,129 @@ define <8 x i16> @abds_i_const(<8 x i16> %src1) {
}
define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abds_i_const_lhs:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.8h, #1
-; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_lhs:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.8h, #1
+; CHECK-SD-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_lhs:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.8h, #1
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abds_i_const_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v0.8h, v0.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v0.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_both() {
-; CHECK-LABEL: abds_i_const_both:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_both:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_both:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.8h, #3
+; CHECK-GI-NEXT: movi v1.8h, #1
+; CHECK-GI-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_bothhigh() {
-; CHECK-LABEL: abds_i_const_bothhigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_bothhigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_bothhigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI38_0
+; CHECK-GI-NEXT: mvni v0.8h, #128, lsl #8
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_onehigh() {
-; CHECK-LABEL: abds_i_const_onehigh:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32765 // =0x7ffd
-; CHECK-NEXT: dup v0.8h, w8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_onehigh:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #32765 // =0x7ffd
+; CHECK-SD-NEXT: dup v0.8h, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_onehigh:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.8h, #1
+; CHECK-GI-NEXT: adrp x8, .LCPI39_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_const_oneneg() {
-; CHECK-LABEL: abds_i_const_oneneg:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.8h, #128, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_const_oneneg:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.8h, #128, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_const_oneneg:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI40_0
+; CHECK-GI-NEXT: mvni v0.8h, #1
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI40_0]
+; CHECK-GI-NEXT: sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abds_i_zero:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_zero:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_zero:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
ret <8 x i16> %result
}
define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abds_i_undef:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: abds_i_undef:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: abds_i_undef:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
ret <8 x i16> %result
}
@@ -549,6 +819,29 @@ define <1 x i64> @recursive() {
ret <1 x i64> %10
}
+define <8 x i16> @abds_sub_smax(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abds_sub_smax:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %smax = call <8 x i16> @llvm.smax(<8 x i16> %src1, <8 x i16> %src2)
+ %smin = call <8 x i16> @llvm.smin(<8 x i16> %src1, <8 x i16> %src2)
+ %result = sub <8 x i16> %smax, %smin
+ ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_sub_umax(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abdu_sub_umax:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %umax = call <8 x i16> @llvm.umax(<8 x i16> %src1, <8 x i16> %src2)
+ %umin = call <8 x i16> @llvm.umin(<8 x i16> %src1, <8 x i16> %src2)
+ %result = sub <8 x i16> %umax, %umin
+ ret <8 x i16> %result
+}
+
+
declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>)
declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>)
declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
>From a5573dfbe82d2b4d7d5dfb2bbe7cadc83dc6e825 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 5 Dec 2024 21:15:20 +0100
Subject: [PATCH 2/4] fix clang-format
---
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index e2b63f1f179bd3..ca8184b60bb6e3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -943,12 +943,10 @@ class CombinerHelper {
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo);
// trunc(abs(sext(x) - sext(y))) -> abds(x, y)
- bool matchTruncAbds(const MachineInstr &MI,
- BuildFnTy &MatchInfo);
+ bool matchTruncAbds(const MachineInstr &MI, BuildFnTy &MatchInfo);
// trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
- bool matchTruncAbdu(const MachineInstr &MI,
- BuildFnTy &MatchInfo);
+ bool matchTruncAbdu(const MachineInstr &MI, BuildFnTy &MatchInfo);
// select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abds(lhs, rhs)
bool matchSelectAbds(const MachineInstr &MI, BuildFnTy &MatchInfo);
>From 0453ea7f1522dccc06cd4e13dc07d76211c22ba3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 5 Dec 2024 21:18:30 +0100
Subject: [PATCH 3/4] fix licensing issue
---
llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
index 9b40008e8fb87c..7ad7e8e1f45963 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
@@ -1,5 +1,4 @@
-//===- CombinerHelperSelect.cpp
-//--------------------------------------------===//
+//===- CombinerHelperSelect.cpp--------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From a9f028ef3823a14dbfdbbb0bc2d59d5cd3126497 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 5 Dec 2024 22:11:37 +0100
Subject: [PATCH 4/4] fix test
---
llvm/test/CodeGen/AArch64/abd-combine.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index 5442bea27441d1..9cf0f5273dd81f 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -2,7 +2,7 @@
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for recursive
+; CHECK-GI: warning: Instruction selection used fallback path for recursive
define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-SD-LABEL: abdu_base:
More information about the llvm-commits
mailing list