[llvm] [GlobalISel] Combine into abd[su] and legalize abd[su] (PR #118865)

Thorsten Schütt via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 5 12:18:58 PST 2024


https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/118865

>From cbda0d749d78057484248123000320440b283ada Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 30 Nov 2024 16:42:14 +0100
Subject: [PATCH 1/3] [GlobalISel] Combine into abd[su]

PowerPC, AArch64, and ARM have instructions. The other targets lower them.
We reverse those lowering as combines.

RISCV lowering:
// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))

X86 lowering
// abds(lhs, rhs) -> select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
// abdu(lhs, rhs) -> select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))

// i.e trunc(abs(sext(Op0) - sext(Op1))) becomes abds(Op0, Op1)
// or trunc(abs(zext(Op0) - zext(Op1))) becomes abdu(Op0, Op1)
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  20 +
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |  29 +
 .../include/llvm/Target/GlobalISel/Combine.td |  66 +-
 .../Target/GlobalISel/SelectionDAGCompat.td   |   2 +
 llvm/lib/CodeGen/GlobalISel/CMakeLists.txt    |   1 +
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  46 ++
 .../GlobalISel/CombinerHelperCasts.cpp        |  70 ++
 .../GlobalISel/CombinerHelperSelect.cpp       |  74 ++
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |   3 +
 llvm/test/CodeGen/AArch64/abd-combine.ll      | 671 +++++++++++++-----
 10 files changed, 792 insertions(+), 190 deletions(-)
 create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 55c3b72c8e027f..e2b63f1f179bd3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -942,6 +942,26 @@ class CombinerHelper {
   // overflow sub
   bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  // trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+  bool matchTruncAbds(const MachineInstr &MI,
+                      BuildFnTy &MatchInfo);
+
+  // trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+  bool matchTruncAbdu(const MachineInstr &MI,
+                      BuildFnTy &MatchInfo);
+
+  // select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abds(lhs, rhs)
+  bool matchSelectAbds(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+  // select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abdu(lhs, rhs)
+  bool matchSelectAbdu(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+  // sub(smax(lhs,rhs), smin(lhs,rhs)) -> abds(lhs, rhs)
+  bool matchSubAbds(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+  // sub(umax(lhs,rhs), umin(lhs,rhs)) - abdu(lhs, rhs)
+  bool matchSubAbdu(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 9e5d4d34f24d2b..a99db10832c5a9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -850,6 +850,16 @@ class GFreeze : public GenericMachineInstr {
   }
 };
 
+/// Represents an abs.
+class GAbs : public GenericMachineInstr {
+public:
+  Register getSourceReg() const { return getOperand(1).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_ABS;
+  }
+};
+
 /// Represents a cast operation.
 /// It models the llvm::CastInst concept.
 /// The exception is bitcast.
@@ -1022,6 +1032,25 @@ class GSplatVector : public GenericMachineInstr {
   };
 };
 
+/// Represents an integer max or min op.
+class GMMaxMinOp : public GenericMachineInstr {
+public:
+  Register getLHSReg() const { return getReg(1); }
+  Register getRHSReg() const { return getReg(2); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_SMAX:
+    case TargetOpcode::G_SMIN:
+    case TargetOpcode::G_UMAX:
+    case TargetOpcode::G_UMIN:
+      return true;
+    default:
+      return false;
+    }
+  };
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b0c63fc7c7b806..bd24bfb0587211 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1913,6 +1913,70 @@ def overflow_combines: GICombineGroup<[
   match_subo_no_overflow
 ]>;
 
+def trunc_abds : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_SEXT $lhs, $x),
+         (G_SEXT $rhs, $y),
+         (G_SUB $sub, $lhs, $rhs),
+         (G_ABS $abs, $sub),
+         (G_TRUNC $root, $abs):$trunc,
+         [{ return Helper.matchTruncAbds(*${trunc}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${trunc}, ${matchinfo}); }])>;
+
+def trunc_abdu : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_ZEXT $lhs, $x),
+         (G_ZEXT $rhs, $y),
+         (G_SUB $sub, $lhs, $rhs),
+         (G_ABS $abs, $sub),
+         (G_TRUNC $root, $abs):$trunc,
+         [{ return Helper.matchTruncAbdu(*${trunc}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${trunc}, ${matchinfo}); }])>;
+
+def select_abds : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_SUB $lhs, $inputr, $inputl),
+         (G_SUB $rhs, $inputl, $inputr),
+         (G_ICMP $cond, $p, $inputl, $inputr),
+         (G_SELECT $root, $cond, $lhs, $rhs):$select,
+         [{ return ${p}.getPredicate() == CmpInst::ICMP_SLT && Helper.matchSelectAbds(*${select}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${select}, ${matchinfo}); }])>;
+
+def select_abdu : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_SUB $lhs, $inputr, $inputl),
+         (G_SUB $rhs, $inputl, $inputr),
+         (G_ICMP $cond, $p, $inputl, $inputr),
+         (G_SELECT $root, $cond, $lhs, $rhs):$select,
+         [{ return ${p}.getPredicate() == CmpInst::ICMP_ULT && Helper.matchSelectAbdu(*${select}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${select}, ${matchinfo}); }])>;
+
+def sub_abds : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_SMAX $smax, $inputl, $inputr),
+         (G_SMIN $smin, $inputl, $inputr),
+         (G_SUB $root, $smax, $smin):$sub,
+         [{ return Helper.matchSubAbds(*${sub}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${sub}, ${matchinfo}); }])>;
+
+def sub_abdu : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_UMAX $umax, $inputl, $inputr),
+         (G_UMIN $umin, $inputl, $inputr),
+         (G_SUB $root, $umax, $umin):$sub,
+         [{ return Helper.matchSubAbdu(*${sub}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${sub}, ${matchinfo}); }])>;
+
+def abd_su_combines: GICombineGroup<[
+  trunc_abds,
+  trunc_abdu,
+  select_abds,
+  select_abdu,
+  sub_abds,
+  sub_abdu
+]>;
+
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -1979,7 +2043,7 @@ def shuffle_combines : GICombineGroup<[combine_shuffle_concat,
 def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     vector_ops_combines, freeze_combines, cast_combines,
     insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
-    combine_extracted_vector_load,
+    combine_extracted_vector_load, abd_su_combines,
     undef_combines, identity_combines, phi_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
     reassocs, ptr_add_immed_chain, cmp_combines,
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 2148f5be4c41aa..807d7195b811cb 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -76,6 +76,8 @@ def : GINodeEquiv<G_XOR, xor>;
 def : GINodeEquiv<G_SHL, shl>;
 def : GINodeEquiv<G_LSHR, srl>;
 def : GINodeEquiv<G_ASHR, sra>;
+def : GINodeEquiv<G_ABDS, abds>;
+def : GINodeEquiv<G_ABDU, abdu>;
 def : GINodeEquiv<G_SADDSAT, saddsat>;
 def : GINodeEquiv<G_UADDSAT, uaddsat>;
 def : GINodeEquiv<G_SSUBSAT, ssubsat>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index a45024d120be68..9d9aa7af1e3961 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMGlobalISel
   CombinerHelperArtifacts.cpp
   CombinerHelperCasts.cpp
   CombinerHelperCompares.cpp
+  CombinerHelperSelect.cpp
   CombinerHelperVectorOps.cpp
   GIMatchTableExecutor.cpp
   GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d95fc8cfbcf558..79ce5e92d5dc38 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7865,3 +7865,49 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
 
   return false;
 }
+
+// sub(smax(lhs,rhs), smin(lhs,rhs)) -> abds(lhs, rhs)
+bool CombinerHelper::matchSubAbds(const MachineInstr &MI,
+                                  BuildFnTy &MatchInfo) {
+  const GSub *Sub = cast<GSub>(&MI);
+  const GMMaxMinOp *LHS = cast<GMMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+  const GMMaxMinOp *RHS = cast<GMMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+
+  if (!MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+    return false;
+
+  Register Dst = Sub->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}}))
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildAbds(Dst, LHS->getLHSReg(), LHS->getRHSReg());
+  };
+  return true;
+}
+
+// sub(umax(lhs,rhs), umin(lhs,rhs)) -> abdu(lhs, rhs)
+bool CombinerHelper::matchSubAbdu(const MachineInstr &MI,
+                                  BuildFnTy &MatchInfo) {
+  const GSub *Sub = cast<GSub>(&MI);
+  const GMMaxMinOp *LHS = cast<GMMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+  const GMMaxMinOp *RHS = cast<GMMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+
+  if (!MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+    return false;
+
+  Register Dst = Sub->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}}))
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildAbdu(Dst, LHS->getLHSReg(), LHS->getRHSReg());
+  };
+  return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 30557e6a2304e6..16421ea088e9b5 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -359,3 +359,73 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
     return false;
   }
 }
+
+// trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+bool CombinerHelper::matchTruncAbds(const MachineInstr &MI,
+                                    BuildFnTy &MatchInfo) {
+  const GTrunc *Trunc = cast<GTrunc>(&MI);
+  const GAbs *Abs = cast<GAbs>(MRI.getVRegDef(Trunc->getSrcReg()));
+  const GSub *Sub = cast<GSub>(MRI.getVRegDef(Abs->getSourceReg()));
+
+  Register Dst = Trunc->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  GSext *SextLHS = cast<GSext>(MRI.getVRegDef(Sub->getLHSReg()));
+  GSext *SextRHS = cast<GSext>(MRI.getVRegDef(Sub->getRHSReg()));
+
+  LLT SextLHSTy = MRI.getType(SextLHS->getSrcReg());
+  LLT SextRHSTy = MRI.getType(SextRHS->getSrcReg());
+
+  if (SextLHSTy != SextRHSTy || DstTy != SextLHSTy)
+    return false;
+
+  // one-use
+  if (!MRI.hasOneNonDBGUse(Abs->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Sub->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Sub->getLHSReg()) ||
+      !MRI.hasOneNonDBGUse(Sub->getRHSReg()))
+    return false;
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}}))
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildAbds(Dst, SextLHS->getSrcReg(), SextLHS->getSrcReg());
+  };
+  return true;
+}
+
+// trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+bool CombinerHelper::matchTruncAbdu(const MachineInstr &MI,
+                                    BuildFnTy &MatchInfo) {
+  const GTrunc *Trunc = cast<GTrunc>(&MI);
+  const GAbs *Abs = cast<GAbs>(MRI.getVRegDef(Trunc->getSrcReg()));
+  const GSub *Sub = cast<GSub>(MRI.getVRegDef(Abs->getSourceReg()));
+
+  Register Dst = Trunc->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  GZext *ZextLHS = cast<GZext>(MRI.getVRegDef(Sub->getLHSReg()));
+  GZext *ZextRHS = cast<GZext>(MRI.getVRegDef(Sub->getRHSReg()));
+
+  LLT ZextLHSTy = MRI.getType(ZextLHS->getSrcReg());
+  LLT ZextRHSTy = MRI.getType(ZextRHS->getSrcReg());
+
+  if (ZextLHSTy != ZextRHSTy || DstTy != ZextLHSTy)
+    return false;
+
+  // one-use
+  if (!MRI.hasOneNonDBGUse(Abs->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Sub->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Sub->getLHSReg()) ||
+      !MRI.hasOneNonDBGUse(Sub->getRHSReg()))
+    return false;
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}}))
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildAbdu(Dst, ZextLHS->getSrcReg(), ZextLHS->getSrcReg());
+  };
+  return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
new file mode 100644
index 00000000000000..9b40008e8fb87c
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
@@ -0,0 +1,74 @@
+//===- CombinerHelperSelect.cpp
+//--------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_SELECT.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+// select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs) -> abds(lhs, rhs)
+bool CombinerHelper::matchSelectAbds(const MachineInstr &MI,
+                                     BuildFnTy &MatchInfo) {
+  const GSelect *Select = cast<GSelect>(&MI);
+  GSub *LHS = cast<GSub>(MRI.getVRegDef(Select->getTrueReg()));
+  GSub *RHS = cast<GSub>(MRI.getVRegDef(Select->getFalseReg()));
+
+  if (!MRI.hasOneNonDBGUse(Select->getCondReg()) ||
+      !MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+    return false;
+
+  Register Dst = Select->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}}))
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildAbds(Dst, RHS->getLHSReg(), RHS->getRHSReg());
+  };
+  return true;
+}
+
+// select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abdu(lhs, rhs)
+bool CombinerHelper::matchSelectAbdu(const MachineInstr &MI,
+                                     BuildFnTy &MatchInfo) {
+  const GSelect *Select = cast<GSelect>(&MI);
+  GSub *LHS = cast<GSub>(MRI.getVRegDef(Select->getTrueReg()));
+  GSub *RHS = cast<GSub>(MRI.getVRegDef(Select->getFalseReg()));
+
+  if (!MRI.hasOneNonDBGUse(Select->getCondReg()) ||
+      !MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+    return false;
+
+  Register Dst = Select->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}}))
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildAbdu(Dst, RHS->getLHSReg(), RHS->getRHSReg());
+  };
+  return true;
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 619a041c273cd8..7c980692f217b1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1344,6 +1344,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder(G_SPLAT_VECTOR)
       .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
 
+  getActionDefinitionsBuilder({G_ABDS, G_ABDU})
+      .legalFor({{v8s8}, {v4s16}, {v2s32}, {v16s8}, {v8s16}, {v4s32}});
+
   getLegacyLegalizerInfo().computeTables();
   verify(*ST.getInstrInfo());
 }
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index e48680f4be98b6..5442bea27441d1 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -1,11 +1,19 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for recursive
 
 define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
-; CHECK-LABEL: abdu_base:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_base:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_base:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -15,17 +23,29 @@ define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
 }
 
 define <8 x i16> @abdu_const(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
-; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.4s, #1
+; CHECK-SD-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    sub v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT:    abs v1.4s, v1.4s
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT:    sub v2.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -34,15 +54,25 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const_lhs:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    usubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v2.4s
-; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const_lhs:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.4s, #1
+; CHECK-SD-NEXT:    usubw v2.4s, v1.4s, v0.4h
+; CHECK-SD-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    abs v1.4s, v2.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const_lhs:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    usubw v2.4s, v1.4s, v0.4h
+; CHECK-GI-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -51,16 +81,27 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
-; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    neg v1.4s, v2.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-SD-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-SD-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-SD-NEXT:    neg v1.4s, v2.4s
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    abs v1.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    neg v2.4s, v2.4s
+; CHECK-GI-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -69,10 +110,17 @@ define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_const_both() {
-; CHECK-LABEL: abdu_const_both:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const_both:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const_both:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.4s, #2
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
   %result = trunc <8 x i32> %abs to <8 x i16>
@@ -80,10 +128,16 @@ define <8 x i16> @abdu_const_both() {
 }
 
 define <8 x i16> @abdu_const_bothhigh() {
-; CHECK-LABEL: abdu_const_bothhigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const_bothhigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const_bothhigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mvni v0.8h, #1
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
   %zextsrc2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -93,9 +147,14 @@ define <8 x i16> @abdu_const_bothhigh() {
 }
 
 define <8 x i16> @abdu_undef(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_undef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_undef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_undef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %zextsrc2 = zext <8 x i16> undef to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -105,10 +164,18 @@ define <8 x i16> @abdu_undef(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ugt:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_ugt:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_ugt:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmhi v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp ugt <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -117,10 +184,18 @@ define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_uge:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_uge:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_uge:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmhs v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp uge <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -141,10 +216,18 @@ define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ule:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_ule:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_ule:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmhs v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp ule <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -153,10 +236,18 @@ define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sgt:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_sgt:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_sgt:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmgt v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp sgt <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -165,10 +256,18 @@ define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sge:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_sge:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_sge:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmge v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp sge <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -189,10 +288,18 @@ define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abds_sle(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sle:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_sle:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_sle:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmge v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp sle <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -221,75 +328,128 @@ define <8 x i16> @abdu_i_const(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_const_lhs:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.8h, #1
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_lhs:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.8h, #1
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_lhs:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.8h, #1
+; CHECK-GI-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_const_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v0.16b, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_both() {
-; CHECK-LABEL: abdu_i_const_both:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_both:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_both:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.8h, #3
+; CHECK-GI-NEXT:    movi v1.8h, #1
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_bothhigh() {
-; CHECK-LABEL: abdu_i_const_bothhigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_bothhigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_bothhigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT:    mvni v1.8h, #1
+; CHECK-GI-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_onehigh() {
-; CHECK-LABEL: abdu_i_const_onehigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
-; CHECK-NEXT:    dup v0.8h, w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_onehigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #32765 // =0x7ffd
+; CHECK-SD-NEXT:    dup v0.8h, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_onehigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.8h, #1
+; CHECK-GI-NEXT:    adrp x8, .LCPI21_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_oneneg() {
-; CHECK-LABEL: abdu_i_const_oneneg:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #128, lsl #8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_oneneg:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #128, lsl #8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_oneneg:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI22_0
+; CHECK-GI-NEXT:    mvni v0.8h, #1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v0.16b, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_undef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_undef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_undef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
   ret <8 x i16> %result
 }
@@ -312,10 +472,15 @@ define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) {
 
 
 define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
-; CHECK-LABEL: abds_base:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_base:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_base:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -325,17 +490,29 @@ define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
 }
 
 define <8 x i16> @abds_const(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
-; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.4s, #1
+; CHECK-SD-NEXT:    sshll2 v2.4s, v0.8h, #0
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    sub v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT:    abs v1.4s, v1.4s
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT:    sub v2.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -344,15 +521,25 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const_lhs:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    ssubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v2.4s
-; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const_lhs:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.4s, #1
+; CHECK-SD-NEXT:    ssubw v2.4s, v1.4s, v0.4h
+; CHECK-SD-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    abs v1.4s, v2.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const_lhs:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    ssubw v2.4s, v1.4s, v0.4h
+; CHECK-GI-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -361,16 +548,27 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
-; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    neg v1.4s, v2.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-SD-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-SD-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-SD-NEXT:    neg v1.4s, v2.4s
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    abs v1.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    neg v2.4s, v2.4s
+; CHECK-GI-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -379,10 +577,17 @@ define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abds_const_both() {
-; CHECK-LABEL: abds_const_both:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const_both:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const_both:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.4s, #2
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
   %result = trunc <8 x i32> %abs to <8 x i16>
@@ -390,10 +595,16 @@ define <8 x i16> @abds_const_both() {
 }
 
 define <8 x i16> @abds_const_bothhigh() {
-; CHECK-LABEL: abds_const_bothhigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const_bothhigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const_bothhigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mvni v0.8h, #1
+; CHECK-GI-NEXT:    sabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
   %zextsrc2 = sext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -403,14 +614,19 @@ define <8 x i16> @abds_const_bothhigh() {
 }
 
 define <8 x i16> @abds_undef(<8 x i16> %src1) {
-; CHECK-LABEL: abds_undef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
-; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_undef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    abs v1.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_undef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sabd v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %zextsrc2 = sext <8 x i16> undef to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -441,75 +657,129 @@ define <8 x i16> @abds_i_const(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abds_i_const_lhs:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.8h, #1
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_lhs:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.8h, #1
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_lhs:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.8h, #1
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abds_i_const_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    abs v0.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    abs v0.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_both() {
-; CHECK-LABEL: abds_i_const_both:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_both:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_both:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.8h, #3
+; CHECK-GI-NEXT:    movi v1.8h, #1
+; CHECK-GI-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_bothhigh() {
-; CHECK-LABEL: abds_i_const_bothhigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_bothhigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_bothhigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
+; CHECK-GI-NEXT:    mvni v0.8h, #128, lsl #8
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_onehigh() {
-; CHECK-LABEL: abds_i_const_onehigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
-; CHECK-NEXT:    dup v0.8h, w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_onehigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #32765 // =0x7ffd
+; CHECK-SD-NEXT:    dup v0.8h, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_onehigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.8h, #1
+; CHECK-GI-NEXT:    adrp x8, .LCPI39_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI39_0]
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_oneneg() {
-; CHECK-LABEL: abds_i_const_oneneg:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #128, lsl #8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_oneneg:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #128, lsl #8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_oneneg:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI40_0
+; CHECK-GI-NEXT:    mvni v0.8h, #1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI40_0]
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abds_i_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    abs v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    abs v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abds_i_undef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_undef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_undef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
   ret <8 x i16> %result
 }
@@ -549,6 +819,29 @@ define <1 x i64> @recursive() {
   ret <1 x i64> %10
 }
 
+define <8 x i16> @abds_sub_smax(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abds_sub_smax:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %smax = call <8 x i16> @llvm.smax(<8 x i16> %src1, <8 x i16> %src2)
+  %smin = call <8 x i16> @llvm.smin(<8 x i16> %src1, <8 x i16> %src2)
+  %result = sub <8 x i16> %smax, %smin
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_sub_umax(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: abdu_sub_umax:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %umax = call <8 x i16> @llvm.umax(<8 x i16> %src1, <8 x i16> %src2)
+  %umin = call <8 x i16> @llvm.umin(<8 x i16> %src1, <8 x i16> %src2)
+  %result = sub <8 x i16> %umax, %umin
+  ret <8 x i16> %result
+}
+
+
 declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>)
 declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>)
 declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)

>From a5573dfbe82d2b4d7d5dfb2bbe7cadc83dc6e825 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 5 Dec 2024 21:15:20 +0100
Subject: [PATCH 2/3] fix clang-format

---
 llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index e2b63f1f179bd3..ca8184b60bb6e3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -943,12 +943,10 @@ class CombinerHelper {
   bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo);
 
   // trunc(abs(sext(x) - sext(y))) -> abds(x, y)
-  bool matchTruncAbds(const MachineInstr &MI,
-                      BuildFnTy &MatchInfo);
+  bool matchTruncAbds(const MachineInstr &MI, BuildFnTy &MatchInfo);
 
   // trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
-  bool matchTruncAbdu(const MachineInstr &MI,
-                      BuildFnTy &MatchInfo);
+  bool matchTruncAbdu(const MachineInstr &MI, BuildFnTy &MatchInfo);
 
   // select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abds(lhs, rhs)
   bool matchSelectAbds(const MachineInstr &MI, BuildFnTy &MatchInfo);

>From 0453ea7f1522dccc06cd4e13dc07d76211c22ba3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 5 Dec 2024 21:18:30 +0100
Subject: [PATCH 3/3] fix licensing issue

---
 llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
index 9b40008e8fb87c..7ad7e8e1f45963 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
@@ -1,5 +1,4 @@
-//===- CombinerHelperSelect.cpp
-//--------------------------------------------===//
+//===- CombinerHelperSelect.cpp--------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.



More information about the llvm-commits mailing list