[llvm] [GlobalISel] Combine into abd[su] and legalize abd[su] (PR #179437)

Julian Pokrovsky via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 3 03:25:58 PST 2026


https://github.com/raventid created https://github.com/llvm/llvm-project/pull/179437

This patch adds a combine for (sub (max a, b), (min a, b)) -> (abd a, b) and (trunc (abs (sub (sext a), (sext b)))) -> (abds a, b). It also adds the necessary legalizer support for AArch64.

Resolves https://github.com/llvm/llvm-project/issues/118085

>From a7246906efd0e378c8e84eb8bbb0addd685fcb9d Mon Sep 17 00:00:00 2001
From: raventid <juliankul at gmail.com>
Date: Wed, 28 Jan 2026 18:03:20 +0800
Subject: [PATCH] [GlobalISel] Combine into abd[su]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a combine for (sub (max a, b), (min a, b)) -> (abd a, b) and (trunc (abs (sub (sext a), (sext b)))) -> (abds a, b). It also adds the necessary legalizer support for AArch64.

Co-authored-by: Thorsten Schütt <schuett at gmail.com>
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  18 +
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |  29 +
 .../include/llvm/Target/GlobalISel/Combine.td |  68 +-
 llvm/lib/CodeGen/GlobalISel/CMakeLists.txt    |   1 +
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  30 +
 .../GlobalISel/CombinerHelperCasts.cpp        |  54 ++
 .../GlobalISel/CombinerHelperSelect.cpp       |  59 ++
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |   2 +
 .../GlobalISel/legalizer-info-validation.mir  |  14 +-
 llvm/test/CodeGen/AArch64/abd-combine.ll      | 715 +++++++++++++-----
 10 files changed, 803 insertions(+), 187 deletions(-)
 create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 5d4347066a40c..877a242948ccb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -1050,6 +1050,24 @@ class CombinerHelper {
   // (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
   bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const;
 
+  // trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+  bool matchTruncAbds(const MachineInstr &MI);
+
+  // trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+  bool matchTruncAbdu(const MachineInstr &MI);
+
+  // select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abds(lhs, rhs)
+  bool matchSelectAbds(const MachineInstr &MI);
+
+  // select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abdu(lhs, rhs)
+  bool matchSelectAbdu(const MachineInstr &MI);
+
+  // sub(smax(lhs,rhs), smin(lhs,rhs)) -> abds(lhs, rhs)
+  bool matchSubAbds(const MachineInstr &MI);
+
+  // sub(umax(lhs,rhs), umin(lhs,rhs)) - abdu(lhs, rhs)
+  bool matchSubAbdu(const MachineInstr &MI);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 5faf57fd06228..85bb6ca76f62e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -853,6 +853,16 @@ class GFreeze : public GenericMachineInstr {
   }
 };
 
+/// Represents an abs.
+class GAbs : public GenericMachineInstr {
+public:
+  Register getSourceReg() const { return getOperand(1).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_ABS;
+  }
+};
+
 /// Represents a cast operation.
 /// It models the llvm::CastInst concept.
 /// The exception is bitcast.
@@ -1028,6 +1038,25 @@ class GSplatVector : public GenericMachineInstr {
   };
 };
 
+/// Represents an integer max or min op.
+class GMaxMinOp : public GenericMachineInstr {
+public:
+  Register getLHSReg() const { return getReg(1); }
+  Register getRHSReg() const { return getReg(2); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_SMAX:
+    case TargetOpcode::G_SMIN:
+    case TargetOpcode::G_UMAX:
+    case TargetOpcode::G_UMIN:
+      return true;
+    default:
+      return false;
+    }
+  };
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index dae5809a4e052..d1f14bb9b696e 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -2108,6 +2108,72 @@ def overflow_combines: GICombineGroup<[
   match_subo_no_overflow
 ]>;
 
+// trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+def trunc_abds : GICombineRule<
+  (defs root:$root),
+  (match (G_SEXT $lhs, $x),
+         (G_SEXT $rhs, $y),
+         (G_SUB $sub, $lhs, $rhs),
+         (G_ABS $abs, $sub),
+         (G_TRUNC $root, $abs):$trunc,
+         [{ return Helper.matchTruncAbds(*${trunc}); }]),
+  (apply (G_ABDS $root, $x, $y))>;
+
+// trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+def trunc_abdu : GICombineRule<
+  (defs root:$root),
+  (match (G_ZEXT $lhs, $x),
+         (G_ZEXT $rhs, $y),
+         (G_SUB $sub, $lhs, $rhs),
+         (G_ABS $abs, $sub),
+         (G_TRUNC $root, $abs):$trunc,
+         [{ return Helper.matchTruncAbdu(*${trunc}); }]),
+  (apply (G_ABDU $root, $x, $y))>;
+
+def select_abds : GICombineRule<
+  (defs root:$root),
+  (match (G_SUB $lhs, $inputr, $inputl),
+         (G_SUB $rhs, $inputl, $inputr),
+         (G_ICMP $cond, $p, $inputl, $inputr),
+         (G_SELECT $root, $cond, $lhs, $rhs):$select,
+         [{ return ${p}.getPredicate() == CmpInst::ICMP_SLT && Helper.matchSelectAbds(*${select}); }]),
+  (apply (G_ABDS $root, $inputl, $inputr))>;
+
+def select_abdu : GICombineRule<
+  (defs root:$root),
+  (match (G_SUB $lhs, $inputr, $inputl),
+         (G_SUB $rhs, $inputl, $inputr),
+         (G_ICMP $cond, $p, $inputl, $inputr),
+         (G_SELECT $root, $cond, $lhs, $rhs):$select,
+         [{ return ${p}.getPredicate() == CmpInst::ICMP_ULT && Helper.matchSelectAbdu(*${select}); }]),
+  (apply (G_ABDU $root, $inputl, $inputr))>;
+
+def sub_abds : GICombineRule<
+  (defs root:$root),
+  (match (G_SMAX $smax, $inputl, $inputr),
+         (G_SMIN $smin, $inputl, $inputr),
+         (G_SUB $root, $smax, $smin):$sub,
+         [{ return Helper.matchSubAbds(*${sub}); }]),
+  (apply (G_ABDS $root, $inputl, $inputr))>;
+
+def sub_abdu : GICombineRule<
+  (defs root:$root),
+  (match (G_UMAX $umax, $inputl, $inputr),
+         (G_UMIN $umin, $inputl, $inputr),
+         (G_SUB $root, $umax, $umin):$sub,
+         [{ return Helper.matchSubAbdu(*${sub}); }]),
+  (apply (G_ABDU $root, $inputl, $inputr))>;
+
+def abd_su_combines: GICombineGroup<[
+  trunc_abds,
+  trunc_abdu,
+  select_abds,
+  select_abdu,
+  sub_abds,
+  sub_abdu
+]>;
+
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -2175,7 +2241,7 @@ def shuffle_combines : GICombineGroup<[combine_shuffle_concat,
 def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     vector_ops_combines, freeze_combines, cast_combines,
     insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
-    combine_extracted_vector_load,
+    combine_extracted_vector_load, abd_su_combines,
     undef_combines, identity_combines, phi_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
     reassocs, ptr_add_immed_chain, cmp_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index 27b6ea745921a..e3e49321923fd 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMGlobalISel
   CombinerHelperArtifacts.cpp
   CombinerHelperCasts.cpp
   CombinerHelperCompares.cpp
+  CombinerHelperSelect.cpp
   CombinerHelperVectorOps.cpp
   GIMatchTableExecutor.cpp
   GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 08696f08284b1..7f304e51aeb7f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -8598,4 +8598,34 @@ bool CombinerHelper::matchCtls(MachineInstr &CtlzMI,
   };
 
   return true;
+// sub(smax(lhs,rhs), smin(lhs,rhs)) -> abds(lhs, rhs)
+bool CombinerHelper::matchSubAbds(const MachineInstr &MI) {
+  const GSub *Sub = cast<GSub>(&MI);
+  const GMaxMinOp *LHS = cast<GMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+  const GMaxMinOp *RHS = cast<GMaxMinOp>(MRI.getVRegDef(Sub->getRHSReg()));
+
+  if (!MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+    return false;
+
+  Register Dst = Sub->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}});
+}
+
+// sub(umax(lhs,rhs), umin(lhs,rhs)) -> abdu(lhs, rhs)
+bool CombinerHelper::matchSubAbdu(const MachineInstr &MI) {
+  const GSub *Sub = cast<GSub>(&MI);
+  const GMaxMinOp *LHS = cast<GMaxMinOp>(MRI.getVRegDef(Sub->getLHSReg()));
+  const GMaxMinOp *RHS = cast<GMaxMinOp>(MRI.getVRegDef(Sub->getRHSReg()));
+
+  if (!MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+    return false;
+
+  Register Dst = Sub->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}});
 }
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index fc7e3ae5b7942..e0b6d826c9697 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -412,4 +412,58 @@ bool CombinerHelper::matchRedundantSextInReg(MachineInstr &Root,
   }
 
   return true;
+// trunc(abs(sext(x) - sext(y))) -> abds(x, y)
+bool CombinerHelper::matchTruncAbds(const MachineInstr &MI) {
+  const GTrunc *Trunc = cast<GTrunc>(&MI);
+  const GAbs *Abs = cast<GAbs>(MRI.getVRegDef(Trunc->getSrcReg()));
+  const GSub *Sub = cast<GSub>(MRI.getVRegDef(Abs->getSourceReg()));
+
+  Register Dst = Trunc->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  GSext *SextLHS = cast<GSext>(MRI.getVRegDef(Sub->getLHSReg()));
+  GSext *SextRHS = cast<GSext>(MRI.getVRegDef(Sub->getRHSReg()));
+
+  LLT SextLHSTy = MRI.getType(SextLHS->getSrcReg());
+  LLT SextRHSTy = MRI.getType(SextRHS->getSrcReg());
+
+  if (SextLHSTy != SextRHSTy || DstTy != SextLHSTy)
+    return false;
+
+  // one-use
+  if (!MRI.hasOneNonDBGUse(Abs->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Sub->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Sub->getLHSReg()) ||
+      !MRI.hasOneNonDBGUse(Sub->getRHSReg()))
+    return false;
+
+  return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}});
+}
+
+// trunc(abs(zext(x) - zext(y))) -> abdu(x, y)
+bool CombinerHelper::matchTruncAbdu(const MachineInstr &MI) {
+  const GTrunc *Trunc = cast<GTrunc>(&MI);
+  const GAbs *Abs = cast<GAbs>(MRI.getVRegDef(Trunc->getSrcReg()));
+  const GSub *Sub = cast<GSub>(MRI.getVRegDef(Abs->getSourceReg()));
+
+  Register Dst = Trunc->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  GZext *ZextLHS = cast<GZext>(MRI.getVRegDef(Sub->getLHSReg()));
+  GZext *ZextRHS = cast<GZext>(MRI.getVRegDef(Sub->getRHSReg()));
+
+  LLT ZextLHSTy = MRI.getType(ZextLHS->getSrcReg());
+  LLT ZextRHSTy = MRI.getType(ZextRHS->getSrcReg());
+
+  if (ZextLHSTy != ZextRHSTy || DstTy != ZextLHSTy)
+    return false;
+
+  // one-use
+  if (!MRI.hasOneNonDBGUse(Abs->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Sub->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(Sub->getLHSReg()) ||
+      !MRI.hasOneNonDBGUse(Sub->getRHSReg()))
+    return false;
+
+  return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}});
 }
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
new file mode 100644
index 0000000000000..01125a8312e48
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperSelect.cpp
@@ -0,0 +1,59 @@
+//===- CombinerHelperSelect.cpp--------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_SELECT.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+// select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs) -> abds(lhs, rhs)
+bool CombinerHelper::matchSelectAbds(const MachineInstr &MI) {
+  const GSelect *Select = cast<GSelect>(&MI);
+  GSub *LHS = cast<GSub>(MRI.getVRegDef(Select->getTrueReg()));
+  GSub *RHS = cast<GSub>(MRI.getVRegDef(Select->getFalseReg()));
+
+  if (!MRI.hasOneNonDBGUse(Select->getCondReg()) ||
+      !MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+    return false;
+
+  Register Dst = Select->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDS, {DstTy}});
+}
+
+// select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs)) -> abdu(lhs, rhs)
+bool CombinerHelper::matchSelectAbdu(const MachineInstr &MI) {
+  const GSelect *Select = cast<GSelect>(&MI);
+  GSub *LHS = cast<GSub>(MRI.getVRegDef(Select->getTrueReg()));
+  GSub *RHS = cast<GSub>(MRI.getVRegDef(Select->getFalseReg()));
+
+  if (!MRI.hasOneNonDBGUse(Select->getCondReg()) ||
+      !MRI.hasOneNonDBGUse(LHS->getReg(0)) ||
+      !MRI.hasOneNonDBGUse(RHS->getReg(0)))
+    return false;
+
+  Register Dst = Select->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  return isLegalOrBeforeLegalizer({TargetOpcode::G_ABDU, {DstTy}});
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 131c72a24964a..bc5b6136397a0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1439,6 +1439,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
 
+  getActionDefinitionsBuilder({G_ABDS, G_ABDU})
+      .legalFor({{v8s8}, {v4s16}, {v2s32}, {v16s8}, {v8s16}, {v4s32}});
   getLegacyLegalizerInfo().computeTables();
   verify(*ST.getInstrInfo());
 }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 655e793f55c59..af4f916d96978 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -70,14 +70,14 @@
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 #
-# DEBUG-NEXT: G_ABDS (opcode [[G_ABDS:[0-9]+]]): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_ABDS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. the first uncovered type index: 1, OK
+# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 #
-# DEBUG-NEXT: G_ABDU (opcode [[G_ABDU:[0-9]+]]): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode [[G_ABDU]] is aliased to [[G_ABDS]]
-# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_ABDU (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. the first uncovered type index: 1, OK
+# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 #
 # DEBUG-NEXT: G_UAVGFLOOR (opcode {{[0-9]+}}): 1 type index, 0 imm indices
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
index cdb40ceb46b1e..ff5ecc8fd2ce8 100644
--- a/llvm/test/CodeGen/AArch64/abd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -1,11 +1,21 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
-; CHECK-LABEL: abdu_base:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_base:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_base:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    usubl v2.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT:    usubl2 v0.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -15,12 +25,24 @@ define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
 }
 
 define <8 x i16> @abdu_const(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4h, #1
-; CHECK-NEXT:    mov v1.d[1], v1.d[0]
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.4h, #1
+; CHECK-SD-NEXT:    mov v1.d[1], v1.d[0]
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT:    sub v2.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -29,12 +51,22 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const_lhs:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4h, #1
-; CHECK-NEXT:    mov v1.d[1], v1.d[0]
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const_lhs:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.4h, #1
+; CHECK-SD-NEXT:    mov v1.d[1], v1.d[0]
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const_lhs:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    usubw v2.4s, v1.4s, v0.4h
+; CHECK-GI-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -43,9 +75,20 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_const_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    neg v2.4s, v2.4s
+; CHECK-GI-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -54,10 +97,17 @@ define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_const_both() {
-; CHECK-LABEL: abdu_const_both:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const_both:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const_both:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.4s, #2
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
   %result = trunc <8 x i32> %abs to <8 x i16>
@@ -65,10 +115,19 @@ define <8 x i16> @abdu_const_both() {
 }
 
 define <8 x i16> @abdu_const_bothhigh() {
-; CHECK-LABEL: abdu_const_bothhigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_const_bothhigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_const_bothhigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi d0, #0xffffffffffffffff
+; CHECK-GI-NEXT:    mvni v1.4h, #1
+; CHECK-GI-NEXT:    usubl v0.4s, v1.4h, v0.4h
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
   %zextsrc2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -78,9 +137,18 @@ define <8 x i16> @abdu_const_bothhigh() {
 }
 
 define <8 x i16> @abdu_undef(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_undef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_undef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_undef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT:    abs v1.4s, v1.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %zextsrc2 = zext <8 x i16> undef to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -90,10 +158,18 @@ define <8 x i16> @abdu_undef(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ugt:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_ugt:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_ugt:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmhi v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp ugt <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -102,10 +178,18 @@ define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_uge:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_uge:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_uge:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmhs v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp uge <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -114,10 +198,18 @@ define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ult:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_ult:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_ult:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmhi v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp ult <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -126,10 +218,18 @@ define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abdu_ule:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_ule:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_ule:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmhs v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp ule <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -138,10 +238,18 @@ define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sgt:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_sgt:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_sgt:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmgt v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp sgt <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -150,10 +258,18 @@ define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sge:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_sge:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_sge:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmge v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bit v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp sge <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -162,10 +278,18 @@ define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_slt:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_slt:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_slt:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmgt v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp slt <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -174,10 +298,18 @@ define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
 }
 
 define <8 x i16> @abds_sle(<8 x i16>, <8 x i16>) {
-; CHECK-LABEL: abds_sle:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_sle:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_sle:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmge v2.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    sub v3.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v2.16b
+; CHECK-GI-NEXT:    ret
   %3 = icmp sle <8 x i16> %0, %1
   %4 = sub <8 x i16> %0, %1
   %5 = sub <8 x i16> %1, %0
@@ -206,75 +338,128 @@ define <8 x i16> @abdu_i_const(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_const_lhs:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.8h, #1
-; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_lhs:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.8h, #1
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_lhs:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.8h, #1
+; CHECK-GI-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_const_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v0.16b, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_both() {
-; CHECK-LABEL: abdu_i_const_both:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_both:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_both:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.8h, #3
+; CHECK-GI-NEXT:    movi v1.8h, #1
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_bothhigh() {
-; CHECK-LABEL: abdu_i_const_bothhigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_bothhigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_bothhigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-GI-NEXT:    mvni v1.8h, #1
+; CHECK-GI-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_onehigh() {
-; CHECK-LABEL: abdu_i_const_onehigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
-; CHECK-NEXT:    dup v0.8h, w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_onehigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #32765 // =0x7ffd
+; CHECK-SD-NEXT:    dup v0.8h, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_onehigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.8h, #1
+; CHECK-GI-NEXT:    adrp x8, .LCPI21_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_const_oneneg() {
-; CHECK-LABEL: abdu_i_const_oneneg:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #128, lsl #8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_const_oneneg:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #128, lsl #8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_const_oneneg:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI22_0
+; CHECK-GI-NEXT:    mvni v0.8h, #1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-NEXT:    uabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v0.16b, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov v0.16b, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abdu_i_undef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abdu_i_undef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_i_undef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
   ret <8 x i16> %result
 }
@@ -297,10 +482,19 @@ define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) {
 
 
 define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
-; CHECK-LABEL: abds_base:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_base:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_base:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ssubl v2.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT:    ssubl2 v0.4s, v0.8h, v1.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -310,12 +504,24 @@ define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
 }
 
 define <8 x i16> @abds_const(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4h, #1
-; CHECK-NEXT:    mov v1.d[1], v1.d[0]
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.4h, #1
+; CHECK-SD-NEXT:    mov v1.d[1], v1.d[0]
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT:    sub v2.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -324,12 +530,22 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const_lhs:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4h, #1
-; CHECK-NEXT:    mov v1.d[1], v1.d[0]
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const_lhs:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.4h, #1
+; CHECK-SD-NEXT:    mov v1.d[1], v1.d[0]
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const_lhs:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.4s, #1
+; CHECK-GI-NEXT:    ssubw v2.4s, v1.4s, v0.4h
+; CHECK-GI-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -338,13 +554,24 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abds_const_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    abs v0.4h, v0.4h
-; CHECK-NEXT:    abs v1.4h, v1.4h
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    abs v0.4h, v0.4h
+; CHECK-SD-NEXT:    abs v1.4h, v1.4h
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    neg v2.4s, v2.4s
+; CHECK-GI-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
+; CHECK-GI-NEXT:    abs v1.4s, v2.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
@@ -353,10 +580,17 @@ define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abds_const_both() {
-; CHECK-LABEL: abds_const_both:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const_both:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const_both:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.4s, #2
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
   %result = trunc <8 x i32> %abs to <8 x i16>
@@ -364,10 +598,19 @@ define <8 x i16> @abds_const_both() {
 }
 
 define <8 x i16> @abds_const_bothhigh() {
-; CHECK-LABEL: abds_const_bothhigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_const_bothhigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_const_bothhigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi d0, #0xffffffffffffffff
+; CHECK-GI-NEXT:    mvni v1.4h, #1
+; CHECK-GI-NEXT:    ssubl v0.4s, v1.4h, v0.4h
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
   %zextsrc2 = sext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -377,14 +620,23 @@ define <8 x i16> @abds_const_bothhigh() {
 }
 
 define <8 x i16> @abds_undef(<8 x i16> %src1) {
-; CHECK-LABEL: abds_undef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
-; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_undef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-SD-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-SD-NEXT:    abs v0.4s, v0.4s
+; CHECK-SD-NEXT:    abs v1.4s, v1.4s
+; CHECK-SD-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_undef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT:    abs v1.4s, v1.4s
+; CHECK-GI-NEXT:    abs v0.4s, v0.4s
+; CHECK-GI-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %zextsrc2 = sext <8 x i16> undef to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, %zextsrc2
@@ -415,75 +667,129 @@ define <8 x i16> @abds_i_const(<8 x i16> %src1) {
 }
 
 define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) {
-; CHECK-LABEL: abds_i_const_lhs:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.8h, #1
-; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_lhs:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v1.8h, #1
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_lhs:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.8h, #1
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) {
-; CHECK-LABEL: abds_i_const_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    abs v0.8h, v0.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    abs v0.8h, v0.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_both() {
-; CHECK-LABEL: abds_i_const_both:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_both:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_both:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.8h, #3
+; CHECK-GI-NEXT:    movi v1.8h, #1
+; CHECK-GI-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_bothhigh() {
-; CHECK-LABEL: abds_i_const_bothhigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_bothhigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_bothhigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
+; CHECK-GI-NEXT:    mvni v0.8h, #128, lsl #8
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_onehigh() {
-; CHECK-LABEL: abds_i_const_onehigh:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
-; CHECK-NEXT:    dup v0.8h, w8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_onehigh:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #32765 // =0x7ffd
+; CHECK-SD-NEXT:    dup v0.8h, w8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_onehigh:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.8h, #1
+; CHECK-GI-NEXT:    adrp x8, .LCPI39_0
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI39_0]
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_const_oneneg() {
-; CHECK-LABEL: abds_i_const_oneneg:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8h, #128, lsl #8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_const_oneneg:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8h, #128, lsl #8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_const_oneneg:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI40_0
+; CHECK-GI-NEXT:    mvni v0.8h, #1
+; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI40_0]
+; CHECK-GI-NEXT:    sabd v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abds_i_zero:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    abs v0.8h, v1.8h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_zero:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    abs v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_zero:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
   ret <8 x i16> %result
 }
 
 define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
-; CHECK-LABEL: abds_i_undef:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: abds_i_undef:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_i_undef:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    ret
   %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
   ret <8 x i16> %result
 }
@@ -502,14 +808,28 @@ define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) {
 }
 
 define <1 x i64> @recursive() {
-; CHECK-LABEL: recursive:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.8b, #254
-; CHECK-NEXT:    ushll v1.8h, v0.8b, #0
-; CHECK-NEXT:    dup v0.8b, v0.b[0]
-; CHECK-NEXT:    saddlp v1.1d, v1.2s
-; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: recursive:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.8b, #254
+; CHECK-SD-NEXT:    ushll v1.8h, v0.8b, #0
+; CHECK-SD-NEXT:    dup v0.8b, v0.b[0]
+; CHECK-SD-NEXT:    saddlp v1.1d, v1.2s
+; CHECK-SD-NEXT:    orr v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: recursive:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi d0, #0xffffffffffffffff
+; CHECK-GI-NEXT:    movi v1.8b, #1
+; CHECK-GI-NEXT:    uabd v2.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    uabdl v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT:    dup v1.16b, v2.b[0]
+; CHECK-GI-NEXT:    saddlp v0.1d, v0.2s
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    fmov x9, d0
+; CHECK-GI-NEXT:    orr x8, x8, x9
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ret
   %1 = tail call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> zeroinitializer, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
   %2 = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
   %3 = zext <8 x i8> %2 to <8 x i16>
@@ -523,6 +843,43 @@ define <1 x i64> @recursive() {
   ret <1 x i64> %10
 }
 
+define <8 x i16> @abds_sub_smax(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-SD-LABEL: abds_sub_smax:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abds_sub_smax:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    smax v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    smin v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v2.8h, v0.8h
+; CHECK-GI-NEXT:    ret
+  %smax = call <8 x i16> @llvm.smax(<8 x i16> %src1, <8 x i16> %src2)
+  %smin = call <8 x i16> @llvm.smin(<8 x i16> %src1, <8 x i16> %src2)
+  %result = sub <8 x i16> %smax, %smin
+  ret <8 x i16> %result
+}
+
+define <8 x i16> @abdu_sub_umax(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-SD-LABEL: abdu_sub_umax:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: abdu_sub_umax:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    umax v2.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    umin v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT:    sub v0.8h, v2.8h, v0.8h
+; CHECK-GI-NEXT:    ret
+  %umax = call <8 x i16> @llvm.umax(<8 x i16> %src1, <8 x i16> %src2)
+  %umin = call <8 x i16> @llvm.umin(<8 x i16> %src1, <8 x i16> %src2)
+  %result = sub <8 x i16> %umax, %umin
+  ret <8 x i16> %result
+}
+
+
 declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>)
 declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>)
 declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)



More information about the llvm-commits mailing list