[llvm] [AArch64] Transform add(x, abs(y)) -> saba(x, y, 0) (PR #156615)
Hari Limaye via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 00:36:18 PDT 2025
https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/156615
>From 5e31eef765246e51c849618516fbd6162b5b6260 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Mon, 1 Sep 2025 14:11:57 +0000
Subject: [PATCH 1/4] [AArch64] Add test coverage for some aba/abal cases
(NFC)
---
llvm/test/CodeGen/AArch64/neon-saba.ll | 262 +++++++++++++++++++++++++
1 file changed, 262 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/neon-saba.ll b/llvm/test/CodeGen/AArch64/neon-saba.ll
index 19967bd1a69ec..2cb5053e07d6a 100644
--- a/llvm/test/CodeGen/AArch64/neon-saba.ll
+++ b/llvm/test/CodeGen/AArch64/neon-saba.ll
@@ -174,6 +174,268 @@ define <8 x i8> @saba_sabd_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
ret <8 x i8> %add
}
+; SABA from ADD(SABD(X, ZEROS))
+
+define <4 x i32> @saba_sabd_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
+; CHECK-SD-LABEL: saba_sabd_zeros_4s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.4s, v1.4s
+; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_4s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: ret
+ %sabd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %b, <4 x i32> zeroinitializer)
+ %add = add <4 x i32> %sabd, %a
+ ret <4 x i32> %add
+}
+
+define <2 x i32> @saba_sabd_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-SD-LABEL: saba_sabd_zeros_2s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.2s, v1.2s
+; CHECK-SD-NEXT: add v0.2s, v1.2s, v0.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_2s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.2s, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
+ %sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
+ %add = add <2 x i32> %sabd, %a
+ ret <2 x i32> %add
+}
+
+define <8 x i16> @saba_sabd_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
+; CHECK-SD-LABEL: saba_sabd_zeros_8h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.8h, v1.8h
+; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_8h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: ret
+ %sabd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %b, <8 x i16> zeroinitializer)
+ %add = add <8 x i16> %sabd, %a
+ ret <8 x i16> %add
+}
+
+define <4 x i16> @saba_sabd_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
+; CHECK-SD-LABEL: saba_sabd_zeros_4h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.4h, v1.4h
+; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_4h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.4h, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
+ %sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
+ %add = add <4 x i16> %sabd, %a
+ ret <4 x i16> %add
+}
+
+define <16 x i8> @saba_sabd_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
+; CHECK-SD-LABEL: saba_sabd_zeros_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.16b, v1.16b
+; CHECK-SD-NEXT: add v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ret
+ %sabd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %b, <16 x i8> zeroinitializer)
+ %add = add <16 x i8> %sabd, %a
+ ret <16 x i8> %add
+}
+
+define <8 x i8> @saba_sabd_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
+; CHECK-SD-LABEL: saba_sabd_zeros_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.8b, v1.8b
+; CHECK-SD-NEXT: add v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.8b, v1.8b, v2.8b
+; CHECK-GI-NEXT: ret
+ %sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
+ %add = add <8 x i8> %sabd, %a
+ ret <8 x i8> %add
+}
+
+define <4 x i32> @saba_abs_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
+; CHECK-LABEL: saba_abs_zeros_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.4s, v1.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %b, i1 true)
+ %add = add <4 x i32> %a, %abs
+ ret <4 x i32> %add
+}
+
+define <2 x i32> @saba_abs_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: saba_abs_zeros_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.2s, v1.2s
+; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+ %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
+ %add = add <2 x i32> %a, %abs
+ ret <2 x i32> %add
+}
+
+define <8 x i16> @saba_abs_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
+; CHECK-LABEL: saba_abs_zeros_8h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.8h, v1.8h
+; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %b, i1 true)
+ %add = add <8 x i16> %a, %abs
+ ret <8 x i16> %add
+}
+
+define <4 x i16> @saba_abs_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
+; CHECK-LABEL: saba_abs_zeros_4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.4h, v1.4h
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+ %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
+ %add = add <4 x i16> %a, %abs
+ ret <4 x i16> %add
+}
+
+define <16 x i8> @saba_abs_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
+; CHECK-LABEL: saba_abs_zeros_16b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.16b, v1.16b
+; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %b, i1 true)
+ %add = add <16 x i8> %a, %abs
+ ret <16 x i8> %add
+}
+
+define <8 x i8> @saba_abs_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
+; CHECK-LABEL: saba_abs_zeros_8b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.8b, v1.8b
+; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
+ %add = add <8 x i8> %a, %abs
+ ret <8 x i8> %add
+}
+
+; SABAL from ADD(ZEXT(SABD(X, ZEROS)))
+
+define <2 x i64> @sabal_sabd_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
+; CHECK-SD-LABEL: sabal_sabd_zeros_2s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.2s, v1.2s
+; CHECK-SD-NEXT: uaddw v0.2d, v0.2d, v1.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabal_sabd_zeros_2s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: sabal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
+ %sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
+ %sabd.zext = zext <2 x i32> %sabd to <2 x i64>
+ %add = add <2 x i64> %sabd.zext, %a
+ ret <2 x i64> %add
+}
+
+define <4 x i32> @sabal_sabd_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
+; CHECK-SD-LABEL: sabal_sabd_zeros_4h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.4h, v1.4h
+; CHECK-SD-NEXT: uaddw v0.4s, v0.4s, v1.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabal_sabd_zeros_4h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: sabal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
+ %sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
+ %sabd.zext = zext <4 x i16> %sabd to <4 x i32>
+ %add = add <4 x i32> %sabd.zext, %a
+ ret <4 x i32> %add
+}
+
+define <8 x i16> @sabal_sabd_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
+; CHECK-SD-LABEL: sabal_sabd_zeros_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.8b, v1.8b
+; CHECK-SD-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabal_sabd_zeros_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: sabal v0.8h, v1.8b, v2.8b
+; CHECK-GI-NEXT: ret
+ %sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
+ %sabd.zext = zext <8 x i8> %sabd to <8 x i16>
+ %add = add <8 x i16> %sabd.zext, %a
+ ret <8 x i16> %add
+}
+
+define <2 x i64> @sabal_abs_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: sabal_abs_zeros_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.2s, v1.2s
+; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT: ret
+ %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
+ %abs.zext = zext <2 x i32> %abs to <2 x i64>
+ %add = add <2 x i64> %a, %abs.zext
+ ret <2 x i64> %add
+}
+
+define <4 x i32> @sabal_abs_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
+; CHECK-LABEL: sabal_abs_zeros_4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.4h, v1.4h
+; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT: ret
+ %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
+ %abs.zext = zext <4 x i16> %abs to <4 x i32>
+ %add = add <4 x i32> %a, %abs.zext
+ ret <4 x i32> %add
+}
+
+define <8 x i16> @sabal_abs_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
+; CHECK-LABEL: sabal_abs_zeros_8b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.8b, v1.8b
+; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT: ret
+ %abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
+ %abs.zext = zext <8 x i8> %abs to <8 x i16>
+ %add = add <8 x i16> %a, %abs.zext
+ ret <8 x i16> %add
+}
+
declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
>From d50b01b2a88e875dd0502ec10c2bdd71bd4be339 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 2 Sep 2025 15:03:11 +0000
Subject: [PATCH 2/4] [AArch64] Transform add(x, abs(y)) -> saba(x, y, 0)
---
.../Target/AArch64/AArch64ISelLowering.cpp | 54 ++++
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 25 ++
llvm/test/CodeGen/AArch64/neon-saba.ll | 294 +++++++++---------
3 files changed, 226 insertions(+), 147 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d3515cf81f443..1bbfcf9fe5206 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -50,6 +50,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
@@ -21914,6 +21915,56 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
}
+// Transform the following:
+// - add(x, abs(y)) -> saba(x, y, 0)
+// - add(x, zext(abs(y))) -> sabal(x, y, 0)
+static SDValue performAddSABACombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (N->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (!VT.isFixedLengthVector())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ auto MatchAbsOrZExtAbs = [](SDValue V0, SDValue V1, SDValue &AbsOp,
+ SDValue &Other, bool &IsZExt) {
+ Other = V1;
+ if (sd_match(V0, m_Abs(SDPatternMatch::m_Value(AbsOp)))) {
+ IsZExt = false;
+ return true;
+ }
+ if (sd_match(V0, SDPatternMatch::m_ZExt(
+ m_Abs(SDPatternMatch::m_Value(AbsOp))))) {
+ IsZExt = true;
+ return true;
+ }
+
+ return false;
+ };
+
+ SDValue AbsOp;
+ SDValue Other;
+ bool IsZExt;
+ if (!MatchAbsOrZExtAbs(N0, N1, AbsOp, Other, IsZExt) &&
+ !MatchAbsOrZExtAbs(N1, N0, AbsOp, Other, IsZExt))
+ return SDValue();
+
+ // Don't perform this on abs(sub), as this will become an ABD/ABA anyway.
+ if (AbsOp.getOpcode() == ISD::SUB)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Zero = DCI.DAG.getConstant(0, DL, MVT::i64);
+ SDValue Zeros = DCI.DAG.getSplatVector(AbsOp.getValueType(), DL, Zero);
+
+ unsigned Opcode = IsZExt ? AArch64ISD::SABAL : AArch64ISD::SABA;
+ return DCI.DAG.getNode(Opcode, DL, VT, Other, AbsOp, Zeros);
+}
+
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// Try to change sum of two reductions.
@@ -21939,6 +21990,9 @@ static SDValue performAddSubCombine(SDNode *N,
if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
+ if (SDValue Val = performAddSABACombine(N, DCI))
+ return Val;
+
return performAddSubLongCombine(N, DCI);
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 62b26b5239365..fdfde5ea1dc37 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1059,6 +1059,10 @@ def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
def AArch64usdot : SDNode<"AArch64ISD::USDOT", SDT_AArch64Dot>;
+// saba/sabal
+def AArch64neonsaba : SDNode<"AArch64ISD::SABA", SDT_AArch64trivec>;
+def AArch64neonsabal : SDNode<"AArch64ISD::SABAL", SDT_AArch64Dot>;
+
// Vector across-lanes addition
// Only the lower result lane is defined.
def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
@@ -6121,6 +6125,19 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
int_aarch64_neon_sqrdmlsh>;
+def : Pat<(AArch64neonsaba (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
+ (SABAv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64neonsaba (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
+ (SABAv4i16 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64neonsaba (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
+ (SABAv2i32 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64neonsaba (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
+ (SABAv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64neonsaba (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
+ (SABAv8i16 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64neonsaba (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
+ (SABAv4i32 V128:$Rd, V128:$Rn, V128:$Rm)>;
+
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
@@ -7008,6 +7025,14 @@ defm : AddSubHNPatterns<ADDHNv2i64_v2i32, ADDHNv2i64_v4i32,
SUBHNv2i64_v2i32, SUBHNv2i64_v4i32,
v2i32, v2i64, 32>;
+// Patterns for SABAL
+def : Pat<(AArch64neonsabal (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)),
+ (SABALv8i8_v8i16 V128:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64neonsabal (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)),
+ (SABALv4i16_v4i32 V128:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64neonsabal (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)),
+ (SABALv2i32_v2i64 V128:$Rd, V64:$Rn, V64:$Rm)>;
+
//----------------------------------------------------------------------------
// AdvSIMD bitwise extract from vector instruction.
//----------------------------------------------------------------------------
diff --git a/llvm/test/CodeGen/AArch64/neon-saba.ll b/llvm/test/CodeGen/AArch64/neon-saba.ll
index 2cb5053e07d6a..c8de6b21e9764 100644
--- a/llvm/test/CodeGen/AArch64/neon-saba.ll
+++ b/llvm/test/CodeGen/AArch64/neon-saba.ll
@@ -177,168 +177,168 @@ define <8 x i8> @saba_sabd_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
; SABA from ADD(SABD(X, ZEROS))
define <4 x i32> @saba_sabd_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.4s, v1.4s
-; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.4s, v1.4s, v2.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s
+; CHECK-NEXT: ret
%sabd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %b, <4 x i32> zeroinitializer)
%add = add <4 x i32> %sabd, %a
ret <4 x i32> %add
}
define <2 x i32> @saba_sabd_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_2s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.2s, v1.2s
-; CHECK-SD-NEXT: add v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_2s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.2s, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s
+; CHECK-NEXT: ret
%sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
%add = add <2 x i32> %sabd, %a
ret <2 x i32> %add
}
define <8 x i16> @saba_sabd_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_8h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.8h, v1.8h
-; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_8h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.8h, v1.8h, v2.8h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_8h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h
+; CHECK-NEXT: ret
%sabd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %b, <8 x i16> zeroinitializer)
%add = add <8 x i16> %sabd, %a
ret <8 x i16> %add
}
define <4 x i16> @saba_sabd_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_4h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.4h, v1.4h
-; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_4h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.4h, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h
+; CHECK-NEXT: ret
%sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
%add = add <4 x i16> %sabd, %a
ret <4 x i16> %add
}
define <16 x i8> @saba_sabd_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_16b:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.16b, v1.16b
-; CHECK-SD-NEXT: add v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_16b:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_16b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
%sabd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %b, <16 x i8> zeroinitializer)
%add = add <16 x i8> %sabd, %a
ret <16 x i8> %add
}
define <8 x i8> @saba_sabd_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_8b:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.8b, v1.8b
-; CHECK-SD-NEXT: add v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_8b:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_8b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b
+; CHECK-NEXT: ret
%sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
%add = add <8 x i8> %sabd, %a
ret <8 x i8> %add
}
define <4 x i32> @saba_abs_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
-; CHECK-LABEL: saba_abs_zeros_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_zeros_4s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: saba v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_zeros_4s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: abs v1.4s, v1.4s
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: ret
%abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %b, i1 true)
%add = add <4 x i32> %a, %abs
ret <4 x i32> %add
}
define <2 x i32> @saba_abs_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
-; CHECK-LABEL: saba_abs_zeros_2s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.2s, v1.2s
-; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_zeros_2s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: saba v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_zeros_2s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: abs v1.2s, v1.2s
+; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
%add = add <2 x i32> %a, %abs
ret <2 x i32> %add
}
define <8 x i16> @saba_abs_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK-LABEL: saba_abs_zeros_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.8h, v1.8h
-; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_zeros_8h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: saba v0.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_zeros_8h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: abs v1.8h, v1.8h
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %b, i1 true)
%add = add <8 x i16> %a, %abs
ret <8 x i16> %add
}
define <4 x i16> @saba_abs_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
-; CHECK-LABEL: saba_abs_zeros_4h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.4h, v1.4h
-; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_zeros_4h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: saba v0.4h, v1.4h, v2.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_zeros_4h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: abs v1.4h, v1.4h
+; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
%add = add <4 x i16> %a, %abs
ret <4 x i16> %add
}
define <16 x i8> @saba_abs_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
-; CHECK-LABEL: saba_abs_zeros_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.16b, v1.16b
-; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_zeros_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: saba v0.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_zeros_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: abs v1.16b, v1.16b
+; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %b, i1 true)
%add = add <16 x i8> %a, %abs
ret <16 x i8> %add
}
define <8 x i8> @saba_abs_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
-; CHECK-LABEL: saba_abs_zeros_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.8b, v1.8b
-; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_zeros_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: saba v0.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_zeros_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: abs v1.8b, v1.8b
+; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
%add = add <8 x i8> %a, %abs
ret <8 x i8> %add
@@ -347,65 +347,53 @@ define <8 x i8> @saba_abs_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
; SABAL from ADD(ZEXT(SABD(X, ZEROS)))
define <2 x i64> @sabal_sabd_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
-; CHECK-SD-LABEL: sabal_sabd_zeros_2s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.2s, v1.2s
-; CHECK-SD-NEXT: uaddw v0.2d, v0.2d, v1.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabal_sabd_zeros_2s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: sabal v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabal_sabd_zeros_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: sabal v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
%sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
%sabd.zext = zext <2 x i32> %sabd to <2 x i64>
- %add = add <2 x i64> %sabd.zext, %a
+ %add = add <2 x i64> %a, %sabd.zext
ret <2 x i64> %add
}
define <4 x i32> @sabal_sabd_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
-; CHECK-SD-LABEL: sabal_sabd_zeros_4h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.4h, v1.4h
-; CHECK-SD-NEXT: uaddw v0.4s, v0.4s, v1.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabal_sabd_zeros_4h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: sabal v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabal_sabd_zeros_4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: sabal v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
%sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
%sabd.zext = zext <4 x i16> %sabd to <4 x i32>
- %add = add <4 x i32> %sabd.zext, %a
+ %add = add <4 x i32> %a, %sabd.zext
ret <4 x i32> %add
}
define <8 x i16> @sabal_sabd_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
-; CHECK-SD-LABEL: sabal_sabd_zeros_8b:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.8b, v1.8b
-; CHECK-SD-NEXT: uaddw v0.8h, v0.8h, v1.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabal_sabd_zeros_8b:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: sabal v0.8h, v1.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabal_sabd_zeros_8b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: sabal v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
%sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
%sabd.zext = zext <8 x i8> %sabd to <8 x i16>
- %add = add <8 x i16> %sabd.zext, %a
+ %add = add <8 x i16> %a, %sabd.zext
ret <8 x i16> %add
}
define <2 x i64> @sabal_abs_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
-; CHECK-LABEL: sabal_abs_zeros_2s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.2s, v1.2s
-; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sabal_abs_zeros_2s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: sabal v0.2d, v1.2s, v2.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabal_abs_zeros_2s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: abs v1.2s, v1.2s
+; CHECK-GI-NEXT: uaddw v0.2d, v0.2d, v1.2s
+; CHECK-GI-NEXT: ret
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
%abs.zext = zext <2 x i32> %abs to <2 x i64>
%add = add <2 x i64> %a, %abs.zext
@@ -413,11 +401,17 @@ define <2 x i64> @sabal_abs_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
}
define <4 x i32> @sabal_abs_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
-; CHECK-LABEL: sabal_abs_zeros_4h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.4h, v1.4h
-; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sabal_abs_zeros_4h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: sabal v0.4s, v1.4h, v2.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabal_abs_zeros_4h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: abs v1.4h, v1.4h
+; CHECK-GI-NEXT: uaddw v0.4s, v0.4s, v1.4h
+; CHECK-GI-NEXT: ret
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
%abs.zext = zext <4 x i16> %abs to <4 x i32>
%add = add <4 x i32> %a, %abs.zext
@@ -425,11 +419,17 @@ define <4 x i32> @sabal_abs_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
}
define <8 x i16> @sabal_abs_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
-; CHECK-LABEL: sabal_abs_zeros_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.8b, v1.8b
-; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sabal_abs_zeros_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
+; CHECK-SD-NEXT: sabal v0.8h, v1.8b, v2.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabal_abs_zeros_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: abs v1.8b, v1.8b
+; CHECK-GI-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-GI-NEXT: ret
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
%abs.zext = zext <8 x i8> %abs to <8 x i16>
%add = add <8 x i16> %a, %abs.zext
>From 2ac2716d6f37722268f5b921020aa9739f52a027 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Wed, 3 Sep 2025 22:29:41 +0000
Subject: [PATCH 3/4] Revert "[AArch64] Transform add(x, abs(y)) -> saba(x, y,
0)"
This reverts commit 8ac71cda207b4334f5b6ea2f4ec5d4e0fb3606d1.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 54 ----
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 25 --
llvm/test/CodeGen/AArch64/neon-saba.ll | 294 +++++++++---------
3 files changed, 147 insertions(+), 226 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1bbfcf9fe5206..d3515cf81f443 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -50,7 +50,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
@@ -21915,56 +21914,6 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
}
-// Transform the following:
-// - add(x, abs(y)) -> saba(x, y, 0)
-// - add(x, zext(abs(y))) -> sabal(x, y, 0)
-static SDValue performAddSABACombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- if (N->getOpcode() != ISD::ADD)
- return SDValue();
-
- EVT VT = N->getValueType(0);
- if (!VT.isFixedLengthVector())
- return SDValue();
-
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- auto MatchAbsOrZExtAbs = [](SDValue V0, SDValue V1, SDValue &AbsOp,
- SDValue &Other, bool &IsZExt) {
- Other = V1;
- if (sd_match(V0, m_Abs(SDPatternMatch::m_Value(AbsOp)))) {
- IsZExt = false;
- return true;
- }
- if (sd_match(V0, SDPatternMatch::m_ZExt(
- m_Abs(SDPatternMatch::m_Value(AbsOp))))) {
- IsZExt = true;
- return true;
- }
-
- return false;
- };
-
- SDValue AbsOp;
- SDValue Other;
- bool IsZExt;
- if (!MatchAbsOrZExtAbs(N0, N1, AbsOp, Other, IsZExt) &&
- !MatchAbsOrZExtAbs(N1, N0, AbsOp, Other, IsZExt))
- return SDValue();
-
- // Don't perform this on abs(sub), as this will become an ABD/ABA anyway.
- if (AbsOp.getOpcode() == ISD::SUB)
- return SDValue();
-
- SDLoc DL(N);
- SDValue Zero = DCI.DAG.getConstant(0, DL, MVT::i64);
- SDValue Zeros = DCI.DAG.getSplatVector(AbsOp.getValueType(), DL, Zero);
-
- unsigned Opcode = IsZExt ? AArch64ISD::SABAL : AArch64ISD::SABA;
- return DCI.DAG.getNode(Opcode, DL, VT, Other, AbsOp, Zeros);
-}
-
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// Try to change sum of two reductions.
@@ -21990,9 +21939,6 @@ static SDValue performAddSubCombine(SDNode *N,
if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
- if (SDValue Val = performAddSABACombine(N, DCI))
- return Val;
-
return performAddSubLongCombine(N, DCI);
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index fdfde5ea1dc37..62b26b5239365 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1059,10 +1059,6 @@ def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
def AArch64usdot : SDNode<"AArch64ISD::USDOT", SDT_AArch64Dot>;
-// saba/sabal
-def AArch64neonsaba : SDNode<"AArch64ISD::SABA", SDT_AArch64trivec>;
-def AArch64neonsabal : SDNode<"AArch64ISD::SABAL", SDT_AArch64Dot>;
-
// Vector across-lanes addition
// Only the lower result lane is defined.
def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
@@ -6125,19 +6121,6 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
int_aarch64_neon_sqrdmlsh>;
-def : Pat<(AArch64neonsaba (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
- (SABAv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(AArch64neonsaba (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
- (SABAv4i16 V64:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(AArch64neonsaba (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
- (SABAv2i32 V64:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(AArch64neonsaba (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
- (SABAv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
-def : Pat<(AArch64neonsaba (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
- (SABAv8i16 V128:$Rd, V128:$Rn, V128:$Rm)>;
-def : Pat<(AArch64neonsaba (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
- (SABAv4i32 V128:$Rd, V128:$Rn, V128:$Rm)>;
-
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
@@ -7025,14 +7008,6 @@ defm : AddSubHNPatterns<ADDHNv2i64_v2i32, ADDHNv2i64_v4i32,
SUBHNv2i64_v2i32, SUBHNv2i64_v4i32,
v2i32, v2i64, 32>;
-// Patterns for SABAL
-def : Pat<(AArch64neonsabal (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)),
- (SABALv8i8_v8i16 V128:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(AArch64neonsabal (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)),
- (SABALv4i16_v4i32 V128:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(AArch64neonsabal (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)),
- (SABALv2i32_v2i64 V128:$Rd, V64:$Rn, V64:$Rm)>;
-
//----------------------------------------------------------------------------
// AdvSIMD bitwise extract from vector instruction.
//----------------------------------------------------------------------------
diff --git a/llvm/test/CodeGen/AArch64/neon-saba.ll b/llvm/test/CodeGen/AArch64/neon-saba.ll
index c8de6b21e9764..2cb5053e07d6a 100644
--- a/llvm/test/CodeGen/AArch64/neon-saba.ll
+++ b/llvm/test/CodeGen/AArch64/neon-saba.ll
@@ -177,168 +177,168 @@ define <8 x i8> @saba_sabd_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
; SABA from ADD(SABD(X, ZEROS))
define <4 x i32> @saba_sabd_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
-; CHECK-LABEL: saba_sabd_zeros_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_sabd_zeros_4s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.4s, v1.4s
+; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_4s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: ret
%sabd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %b, <4 x i32> zeroinitializer)
%add = add <4 x i32> %sabd, %a
ret <4 x i32> %add
}
define <2 x i32> @saba_sabd_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
-; CHECK-LABEL: saba_sabd_zeros_2s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_sabd_zeros_2s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.2s, v1.2s
+; CHECK-SD-NEXT: add v0.2s, v1.2s, v0.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_2s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.2s, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
%sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
%add = add <2 x i32> %sabd, %a
ret <2 x i32> %add
}
define <8 x i16> @saba_sabd_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK-LABEL: saba_sabd_zeros_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_sabd_zeros_8h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.8h, v1.8h
+; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_8h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: ret
%sabd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %b, <8 x i16> zeroinitializer)
%add = add <8 x i16> %sabd, %a
ret <8 x i16> %add
}
define <4 x i16> @saba_sabd_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
-; CHECK-LABEL: saba_sabd_zeros_4h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_sabd_zeros_4h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.4h, v1.4h
+; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_4h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.4h, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
%sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
%add = add <4 x i16> %sabd, %a
ret <4 x i16> %add
}
define <16 x i8> @saba_sabd_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
-; CHECK-LABEL: saba_sabd_zeros_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_sabd_zeros_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.16b, v1.16b
+; CHECK-SD-NEXT: add v0.16b, v1.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ret
%sabd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %b, <16 x i8> zeroinitializer)
%add = add <16 x i8> %sabd, %a
ret <16 x i8> %add
}
define <8 x i8> @saba_sabd_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
-; CHECK-LABEL: saba_sabd_zeros_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_sabd_zeros_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.8b, v1.8b
+; CHECK-SD-NEXT: add v0.8b, v1.8b, v0.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_sabd_zeros_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: saba v0.8b, v1.8b, v2.8b
+; CHECK-GI-NEXT: ret
%sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
%add = add <8 x i8> %sabd, %a
ret <8 x i8> %add
}
define <4 x i32> @saba_abs_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
-; CHECK-SD-LABEL: saba_abs_zeros_4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: saba v0.4s, v1.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_abs_zeros_4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: abs v1.4s, v1.4s
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_abs_zeros_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.4s, v1.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
%abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %b, i1 true)
%add = add <4 x i32> %a, %abs
ret <4 x i32> %add
}
define <2 x i32> @saba_abs_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
-; CHECK-SD-LABEL: saba_abs_zeros_2s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: saba v0.2s, v1.2s, v2.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_abs_zeros_2s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: abs v1.2s, v1.2s
-; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_abs_zeros_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.2s, v1.2s
+; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
%add = add <2 x i32> %a, %abs
ret <2 x i32> %add
}
define <8 x i16> @saba_abs_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK-SD-LABEL: saba_abs_zeros_8h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: saba v0.8h, v1.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_abs_zeros_8h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: abs v1.8h, v1.8h
-; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_abs_zeros_8h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.8h, v1.8h
+; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
%abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %b, i1 true)
%add = add <8 x i16> %a, %abs
ret <8 x i16> %add
}
define <4 x i16> @saba_abs_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
-; CHECK-SD-LABEL: saba_abs_zeros_4h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: saba v0.4h, v1.4h, v2.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_abs_zeros_4h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: abs v1.4h, v1.4h
-; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_abs_zeros_4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.4h, v1.4h
+; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
%add = add <4 x i16> %a, %abs
ret <4 x i16> %add
}
define <16 x i8> @saba_abs_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
-; CHECK-SD-LABEL: saba_abs_zeros_16b:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: saba v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_abs_zeros_16b:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: abs v1.16b, v1.16b
-; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_abs_zeros_16b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.16b, v1.16b
+; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
%abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %b, i1 true)
%add = add <16 x i8> %a, %abs
ret <16 x i8> %add
}
define <8 x i8> @saba_abs_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
-; CHECK-SD-LABEL: saba_abs_zeros_8b:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: saba v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_abs_zeros_8b:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: abs v1.8b, v1.8b
-; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_abs_zeros_8b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.8b, v1.8b
+; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
%add = add <8 x i8> %a, %abs
ret <8 x i8> %add
@@ -347,53 +347,65 @@ define <8 x i8> @saba_abs_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
; SABAL from ADD(ZEXT(SABD(X, ZEROS)))
define <2 x i64> @sabal_sabd_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
-; CHECK-LABEL: sabal_sabd_zeros_2s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: sabal v0.2d, v1.2s, v2.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sabal_sabd_zeros_2s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.2s, v1.2s
+; CHECK-SD-NEXT: uaddw v0.2d, v0.2d, v1.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabal_sabd_zeros_2s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: sabal v0.2d, v1.2s, v2.2s
+; CHECK-GI-NEXT: ret
%sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
%sabd.zext = zext <2 x i32> %sabd to <2 x i64>
- %add = add <2 x i64> %a, %sabd.zext
+ %add = add <2 x i64> %sabd.zext, %a
ret <2 x i64> %add
}
define <4 x i32> @sabal_sabd_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
-; CHECK-LABEL: sabal_sabd_zeros_4h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: sabal v0.4s, v1.4h, v2.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sabal_sabd_zeros_4h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.4h, v1.4h
+; CHECK-SD-NEXT: uaddw v0.4s, v0.4s, v1.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabal_sabd_zeros_4h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: sabal v0.4s, v1.4h, v2.4h
+; CHECK-GI-NEXT: ret
%sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
%sabd.zext = zext <4 x i16> %sabd to <4 x i32>
- %add = add <4 x i32> %a, %sabd.zext
+ %add = add <4 x i32> %sabd.zext, %a
ret <4 x i32> %add
}
define <8 x i16> @sabal_sabd_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
-; CHECK-LABEL: sabal_sabd_zeros_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: sabal v0.8h, v1.8b, v2.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sabal_sabd_zeros_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: abs v1.8b, v1.8b
+; CHECK-SD-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sabal_sabd_zeros_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: sabal v0.8h, v1.8b, v2.8b
+; CHECK-GI-NEXT: ret
%sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
%sabd.zext = zext <8 x i8> %sabd to <8 x i16>
- %add = add <8 x i16> %a, %sabd.zext
+ %add = add <8 x i16> %sabd.zext, %a
ret <8 x i16> %add
}
define <2 x i64> @sabal_abs_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
-; CHECK-SD-LABEL: sabal_abs_zeros_2s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: sabal v0.2d, v1.2s, v2.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabal_abs_zeros_2s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: abs v1.2s, v1.2s
-; CHECK-GI-NEXT: uaddw v0.2d, v0.2d, v1.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabal_abs_zeros_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.2s, v1.2s
+; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT: ret
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
%abs.zext = zext <2 x i32> %abs to <2 x i64>
%add = add <2 x i64> %a, %abs.zext
@@ -401,17 +413,11 @@ define <2 x i64> @sabal_abs_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
}
define <4 x i32> @sabal_abs_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
-; CHECK-SD-LABEL: sabal_abs_zeros_4h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: sabal v0.4s, v1.4h, v2.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabal_abs_zeros_4h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: abs v1.4h, v1.4h
-; CHECK-GI-NEXT: uaddw v0.4s, v0.4s, v1.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabal_abs_zeros_4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.4h, v1.4h
+; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT: ret
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
%abs.zext = zext <4 x i16> %abs to <4 x i32>
%add = add <4 x i32> %a, %abs.zext
@@ -419,17 +425,11 @@ define <4 x i32> @sabal_abs_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
}
define <8 x i16> @sabal_abs_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
-; CHECK-SD-LABEL: sabal_abs_zeros_8b:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0000000000000000
-; CHECK-SD-NEXT: sabal v0.8h, v1.8b, v2.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabal_abs_zeros_8b:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: abs v1.8b, v1.8b
-; CHECK-GI-NEXT: uaddw v0.8h, v0.8h, v1.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabal_abs_zeros_8b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: abs v1.8b, v1.8b
+; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT: ret
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
%abs.zext = zext <8 x i8> %abs to <8 x i16>
%add = add <8 x i16> %a, %abs.zext
>From c3249de00177e43302c2149a189a4906e91d61ed Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Wed, 3 Sep 2025 22:50:10 +0000
Subject: [PATCH 4/4] Do it in tblgen instead
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 23 +++
llvm/test/CodeGen/AArch64/neon-saba.ll | 204 +++++++-------------
llvm/test/CodeGen/AArch64/vecreduce-add.ll | 139 +++++++------
3 files changed, 164 insertions(+), 202 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 62b26b5239365..04b67cb3103a4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8303,6 +8303,29 @@ def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
(AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
}
+// SABA patterns for add(x, abs(y)) -> saba(x, y, 0)
+def : Pat<(v8i8 (add V64:$Vn, (abs V64:$Vm))),
+ (SABAv8i8 V64:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v4i16 (add V64:$Vn, (abs V64:$Vm))),
+ (SABAv4i16 V64:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v2i32 (add V64:$Vn, (abs V64:$Vm))),
+ (SABAv2i32 V64:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v16i8 (add V128:$Vn, (abs V128:$Vm))),
+ (SABAv16i8 V128:$Vn, V128:$Vm, (MOVIv2d_ns (i32 0)))>;
+def : Pat<(v8i16 (add V128:$Vn, (abs V128:$Vm))),
+ (SABAv8i16 V128:$Vn, V128:$Vm, (MOVIv2d_ns (i32 0)))>;
+def : Pat<(v4i32 (add V128:$Vn, (abs V128:$Vm))),
+ (SABAv4i32 V128:$Vn, V128:$Vm, (MOVIv2d_ns (i32 0)))>;
+
+// SABAL patterns for add(x, zext(abs(y))) -> sabal(x, y, 0)
+def : Pat<(v8i16 (add V128:$Vn, (zext (abs (v8i8 V64:$Vm))))),
+ (SABALv8i8_v8i16 V128:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v4i32 (add V128:$Vn, (zext (abs (v4i16 V64:$Vm))))),
+ (SABALv4i16_v4i32 V128:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+def : Pat<(v2i64 (add V128:$Vn, (zext (abs (v2i32 V64:$Vm))))),
+ (SABALv2i32_v2i64 V128:$Vn, V64:$Vm, (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub))>;
+
+
//----------------------------------------------------------------------------
// AdvSIMD indexed element
//----------------------------------------------------------------------------
diff --git a/llvm/test/CodeGen/AArch64/neon-saba.ll b/llvm/test/CodeGen/AArch64/neon-saba.ll
index 2cb5053e07d6a..ddb85d6dee03c 100644
--- a/llvm/test/CodeGen/AArch64/neon-saba.ll
+++ b/llvm/test/CodeGen/AArch64/neon-saba.ll
@@ -12,9 +12,9 @@ define <4 x i32> @saba_abs_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
;
; CHECK-GI-LABEL: saba_abs_4s:
; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
-; CHECK-GI-NEXT: abs v1.4s, v1.4s
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: saba v0.4s, v1.4s, v3.4s
; CHECK-GI-NEXT: ret
%sub = sub nsw <4 x i32> %b, %c
%abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
@@ -30,9 +30,9 @@ define <2 x i32> @saba_abs_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
;
; CHECK-GI-LABEL: saba_abs_2s:
; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
; CHECK-GI-NEXT: sub v1.2s, v1.2s, v2.2s
-; CHECK-GI-NEXT: abs v1.2s, v1.2s
-; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: saba v0.2s, v1.2s, v3.2s
; CHECK-GI-NEXT: ret
%sub = sub nsw <2 x i32> %b, %c
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
@@ -48,9 +48,9 @@ define <8 x i16> @saba_abs_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
;
; CHECK-GI-LABEL: saba_abs_8h:
; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
-; CHECK-GI-NEXT: abs v1.8h, v1.8h
-; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: saba v0.8h, v1.8h, v3.8h
; CHECK-GI-NEXT: ret
%sub = sub nsw <8 x i16> %b, %c
%abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
@@ -66,9 +66,9 @@ define <4 x i16> @saba_abs_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
;
; CHECK-GI-LABEL: saba_abs_4h:
; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
; CHECK-GI-NEXT: sub v1.4h, v1.4h, v2.4h
-; CHECK-GI-NEXT: abs v1.4h, v1.4h
-; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: saba v0.4h, v1.4h, v3.4h
; CHECK-GI-NEXT: ret
%sub = sub nsw <4 x i16> %b, %c
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
@@ -84,9 +84,9 @@ define <16 x i8> @saba_abs_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
;
; CHECK-GI-LABEL: saba_abs_16b:
; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
; CHECK-GI-NEXT: sub v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: abs v1.16b, v1.16b
-; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: saba v0.16b, v1.16b, v3.16b
; CHECK-GI-NEXT: ret
%sub = sub nsw <16 x i8> %b, %c
%abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
@@ -102,9 +102,9 @@ define <8 x i8> @saba_abs_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
;
; CHECK-GI-LABEL: saba_abs_8b:
; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
; CHECK-GI-NEXT: sub v1.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT: abs v1.8b, v1.8b
-; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: saba v0.8b, v1.8b, v3.8b
; CHECK-GI-NEXT: ret
%sub = sub nsw <8 x i8> %b, %c
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %sub, i1 true)
@@ -177,102 +177,66 @@ define <8 x i8> @saba_sabd_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
; SABA from ADD(SABD(X, ZEROS))
define <4 x i32> @saba_sabd_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.4s, v1.4s
-; CHECK-SD-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.4s, v1.4s, v2.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s
+; CHECK-NEXT: ret
%sabd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %b, <4 x i32> zeroinitializer)
%add = add <4 x i32> %sabd, %a
ret <4 x i32> %add
}
define <2 x i32> @saba_sabd_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_2s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.2s, v1.2s
-; CHECK-SD-NEXT: add v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_2s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.2s, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s
+; CHECK-NEXT: ret
%sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
%add = add <2 x i32> %sabd, %a
ret <2 x i32> %add
}
define <8 x i16> @saba_sabd_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_8h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.8h, v1.8h
-; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_8h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.8h, v1.8h, v2.8h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_8h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h
+; CHECK-NEXT: ret
%sabd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %b, <8 x i16> zeroinitializer)
%add = add <8 x i16> %sabd, %a
ret <8 x i16> %add
}
define <4 x i16> @saba_sabd_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_4h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.4h, v1.4h
-; CHECK-SD-NEXT: add v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_4h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.4h, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h
+; CHECK-NEXT: ret
%sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
%add = add <4 x i16> %sabd, %a
ret <4 x i16> %add
}
define <16 x i8> @saba_sabd_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_16b:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.16b, v1.16b
-; CHECK-SD-NEXT: add v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_16b:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_16b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
%sabd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %b, <16 x i8> zeroinitializer)
%add = add <16 x i8> %sabd, %a
ret <16 x i8> %add
}
define <8 x i8> @saba_sabd_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
-; CHECK-SD-LABEL: saba_sabd_zeros_8b:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.8b, v1.8b
-; CHECK-SD-NEXT: add v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: saba_sabd_zeros_8b:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: saba v0.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: saba_sabd_zeros_8b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b
+; CHECK-NEXT: ret
%sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
%add = add <8 x i8> %sabd, %a
ret <8 x i8> %add
@@ -281,8 +245,8 @@ define <8 x i8> @saba_sabd_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
define <4 x i32> @saba_abs_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
; CHECK-LABEL: saba_abs_zeros_4s:
; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.4s, v1.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s
; CHECK-NEXT: ret
%abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %b, i1 true)
%add = add <4 x i32> %a, %abs
@@ -292,8 +256,8 @@ define <4 x i32> @saba_abs_zeros_4s(<4 x i32> %a, <4 x i32> %b) #0 {
define <2 x i32> @saba_abs_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
; CHECK-LABEL: saba_abs_zeros_2s:
; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.2s, v1.2s
-; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s
; CHECK-NEXT: ret
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
%add = add <2 x i32> %a, %abs
@@ -303,8 +267,8 @@ define <2 x i32> @saba_abs_zeros_2s(<2 x i32> %a, <2 x i32> %b) #0 {
define <8 x i16> @saba_abs_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
; CHECK-LABEL: saba_abs_zeros_8h:
; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.8h, v1.8h
-; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h
; CHECK-NEXT: ret
%abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %b, i1 true)
%add = add <8 x i16> %a, %abs
@@ -314,8 +278,8 @@ define <8 x i16> @saba_abs_zeros_8h(<8 x i16> %a, <8 x i16> %b) #0 {
define <4 x i16> @saba_abs_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
; CHECK-LABEL: saba_abs_zeros_4h:
; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.4h, v1.4h
-; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h
; CHECK-NEXT: ret
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
%add = add <4 x i16> %a, %abs
@@ -325,8 +289,8 @@ define <4 x i16> @saba_abs_zeros_4h(<4 x i16> %a, <4 x i16> %b) #0 {
define <16 x i8> @saba_abs_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
; CHECK-LABEL: saba_abs_zeros_16b:
; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.16b, v1.16b
-; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %b, i1 true)
%add = add <16 x i8> %a, %abs
@@ -336,8 +300,8 @@ define <16 x i8> @saba_abs_zeros_16b(<16 x i8> %a, <16 x i8> %b) #0 {
define <8 x i8> @saba_abs_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
; CHECK-LABEL: saba_abs_zeros_8b:
; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.8b, v1.8b
-; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
%add = add <8 x i8> %a, %abs
@@ -347,17 +311,11 @@ define <8 x i8> @saba_abs_zeros_8b(<8 x i8> %a, <8 x i8> %b) #0 {
; SABAL from ADD(ZEXT(SABD(X, ZEROS)))
define <2 x i64> @sabal_sabd_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
-; CHECK-SD-LABEL: sabal_sabd_zeros_2s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.2s, v1.2s
-; CHECK-SD-NEXT: uaddw v0.2d, v0.2d, v1.2s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabal_sabd_zeros_2s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: sabal v0.2d, v1.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabal_sabd_zeros_2s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: sabal v0.2d, v1.2s, v2.2s
+; CHECK-NEXT: ret
%sabd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> zeroinitializer)
%sabd.zext = zext <2 x i32> %sabd to <2 x i64>
%add = add <2 x i64> %sabd.zext, %a
@@ -365,17 +323,11 @@ define <2 x i64> @sabal_sabd_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
}
define <4 x i32> @sabal_sabd_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
-; CHECK-SD-LABEL: sabal_sabd_zeros_4h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.4h, v1.4h
-; CHECK-SD-NEXT: uaddw v0.4s, v0.4s, v1.4h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabal_sabd_zeros_4h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: sabal v0.4s, v1.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabal_sabd_zeros_4h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: sabal v0.4s, v1.4h, v2.4h
+; CHECK-NEXT: ret
%sabd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> zeroinitializer)
%sabd.zext = zext <4 x i16> %sabd to <4 x i32>
%add = add <4 x i32> %sabd.zext, %a
@@ -383,17 +335,11 @@ define <4 x i32> @sabal_sabd_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
}
define <8 x i16> @sabal_sabd_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
-; CHECK-SD-LABEL: sabal_sabd_zeros_8b:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: abs v1.8b, v1.8b
-; CHECK-SD-NEXT: uaddw v0.8h, v0.8h, v1.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sabal_sabd_zeros_8b:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
-; CHECK-GI-NEXT: sabal v0.8h, v1.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sabal_sabd_zeros_8b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: sabal v0.8h, v1.8b, v2.8b
+; CHECK-NEXT: ret
%sabd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> zeroinitializer)
%sabd.zext = zext <8 x i8> %sabd to <8 x i16>
%add = add <8 x i16> %sabd.zext, %a
@@ -403,8 +349,8 @@ define <8 x i16> @sabal_sabd_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
define <2 x i64> @sabal_abs_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
; CHECK-LABEL: sabal_abs_zeros_2s:
; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.2s, v1.2s
-; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: sabal v0.2d, v1.2s, v2.2s
; CHECK-NEXT: ret
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %b, i1 true)
%abs.zext = zext <2 x i32> %abs to <2 x i64>
@@ -415,8 +361,8 @@ define <2 x i64> @sabal_abs_zeros_2s(<2 x i64> %a, <2 x i32> %b) #0 {
define <4 x i32> @sabal_abs_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
; CHECK-LABEL: sabal_abs_zeros_4h:
; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.4h, v1.4h
-; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: sabal v0.4s, v1.4h, v2.4h
; CHECK-NEXT: ret
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %b, i1 true)
%abs.zext = zext <4 x i16> %abs to <4 x i32>
@@ -427,8 +373,8 @@ define <4 x i32> @sabal_abs_zeros_4h(<4 x i32> %a, <4 x i16> %b) #0 {
define <8 x i16> @sabal_abs_zeros_8b(<8 x i16> %a, <8 x i8> %b) #0 {
; CHECK-LABEL: sabal_abs_zeros_8b:
; CHECK: // %bb.0:
-; CHECK-NEXT: abs v1.8b, v1.8b
-; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: sabal v0.8h, v1.8b, v2.8b
; CHECK-NEXT: ret
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %b, i1 true)
%abs.zext = zext <8 x i8> %abs to <8 x i16>
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 74d1165d99b82..fb504028a161b 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4535,96 +4535,89 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
; CHECK-GI-NEXT: ldr d1, [x2]
; CHECK-GI-NEXT: add x10, x0, x9
; CHECK-GI-NEXT: add x11, x2, x8
-; CHECK-GI-NEXT: usubl v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: usubl v2.8h, v0.8b, v1.8b
; CHECK-GI-NEXT: ldr d1, [x10]
-; CHECK-GI-NEXT: ldr d2, [x11]
+; CHECK-GI-NEXT: ldr d3, [x11]
; CHECK-GI-NEXT: add x10, x10, x9
; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: usubl v1.8h, v1.8b, v2.8b
-; CHECK-GI-NEXT: ldr d3, [x10]
-; CHECK-GI-NEXT: ldr d4, [x11]
-; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: add x10, x10, x9
-; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: ldr d2, [x10]
-; CHECK-GI-NEXT: add x10, x10, x9
-; CHECK-GI-NEXT: sshll v7.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: ldr d6, [x11]
-; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: usubl v3.8h, v3.8b, v4.8b
-; CHECK-GI-NEXT: abs v5.4s, v5.4s
-; CHECK-GI-NEXT: abs v0.4s, v0.4s
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: usubl v3.8h, v1.8b, v3.8b
; CHECK-GI-NEXT: ldr d4, [x10]
-; CHECK-GI-NEXT: ldr d16, [x11]
-; CHECK-GI-NEXT: abs v7.4s, v7.4s
-; CHECK-GI-NEXT: abs v1.4s, v1.4s
+; CHECK-GI-NEXT: ldr d5, [x11]
; CHECK-GI-NEXT: add x10, x10, x9
; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: usubl v2.8h, v2.8b, v6.8b
+; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0
+; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0
; CHECK-GI-NEXT: ldr d6, [x10]
-; CHECK-GI-NEXT: ldr d17, [x11]
+; CHECK-GI-NEXT: ldr d7, [x11]
; CHECK-GI-NEXT: add x10, x10, x9
; CHECK-GI-NEXT: add x11, x11, x8
-; CHECK-GI-NEXT: usubl v4.8h, v4.8b, v16.8b
-; CHECK-GI-NEXT: sshll v16.4s, v3.4h, #0
-; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0
-; CHECK-GI-NEXT: add v0.4s, v5.4s, v0.4s
-; CHECK-GI-NEXT: add v1.4s, v7.4s, v1.4s
+; CHECK-GI-NEXT: sshll2 v16.4s, v3.8h, #0
+; CHECK-GI-NEXT: usubl v4.8h, v4.8b, v5.8b
; CHECK-GI-NEXT: ldr d5, [x10]
-; CHECK-GI-NEXT: ldr d7, [x11]
-; CHECK-GI-NEXT: sshll v18.4s, v2.4h, #0
-; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0
-; CHECK-GI-NEXT: usubl v6.8h, v6.8b, v17.8b
-; CHECK-GI-NEXT: ldr d17, [x11, x8]
-; CHECK-GI-NEXT: sshll v19.4s, v4.4h, #0
-; CHECK-GI-NEXT: usubl v5.8h, v5.8b, v7.8b
-; CHECK-GI-NEXT: ldr d7, [x10, x9]
-; CHECK-GI-NEXT: sshll2 v4.4s, v4.8h, #0
+; CHECK-GI-NEXT: ldr d17, [x11]
+; CHECK-GI-NEXT: add x10, x10, x9
+; CHECK-GI-NEXT: add x11, x11, x8
+; CHECK-GI-NEXT: usubl v6.8h, v6.8b, v7.8b
+; CHECK-GI-NEXT: ldr d7, [x10]
+; CHECK-GI-NEXT: ldr d19, [x11]
+; CHECK-GI-NEXT: abs v2.4s, v2.4s
+; CHECK-GI-NEXT: sshll v3.4s, v3.4h, #0
; CHECK-GI-NEXT: abs v16.4s, v16.4s
-; CHECK-GI-NEXT: abs v3.4s, v3.4s
+; CHECK-GI-NEXT: add x10, x10, x9
+; CHECK-GI-NEXT: add x11, x11, x8
+; CHECK-GI-NEXT: usubl v5.8h, v5.8b, v17.8b
+; CHECK-GI-NEXT: ldr d17, [x10]
+; CHECK-GI-NEXT: ldr d20, [x11]
+; CHECK-GI-NEXT: usubl v7.8h, v7.8b, v19.8b
+; CHECK-GI-NEXT: ldr d19, [x10, x9]
+; CHECK-GI-NEXT: ldr d21, [x11, x8]
+; CHECK-GI-NEXT: sshll2 v18.4s, v4.8h, #0
+; CHECK-GI-NEXT: saba v2.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: sshll2 v1.4s, v6.8h, #0
+; CHECK-GI-NEXT: usubl v17.8h, v17.8b, v20.8b
+; CHECK-GI-NEXT: saba v16.4s, v3.4s, v0.4s
+; CHECK-GI-NEXT: sshll2 v3.4s, v5.8h, #0
+; CHECK-GI-NEXT: usubl v19.8h, v19.8b, v21.8b
+; CHECK-GI-NEXT: sshll v4.4s, v4.4h, #0
; CHECK-GI-NEXT: abs v18.4s, v18.4s
-; CHECK-GI-NEXT: abs v2.4s, v2.4s
-; CHECK-GI-NEXT: usubl v7.8h, v7.8b, v17.8b
-; CHECK-GI-NEXT: sshll v17.4s, v6.4h, #0
-; CHECK-GI-NEXT: sshll2 v6.4s, v6.8h, #0
-; CHECK-GI-NEXT: abs v19.4s, v19.4s
-; CHECK-GI-NEXT: abs v4.4s, v4.4s
-; CHECK-GI-NEXT: add v3.4s, v16.4s, v3.4s
-; CHECK-GI-NEXT: sshll v16.4s, v5.4h, #0
-; CHECK-GI-NEXT: sshll2 v5.4s, v5.8h, #0
-; CHECK-GI-NEXT: add v2.4s, v18.4s, v2.4s
-; CHECK-GI-NEXT: abs v17.4s, v17.4s
+; CHECK-GI-NEXT: sshll2 v20.4s, v7.8h, #0
+; CHECK-GI-NEXT: sshll v6.4s, v6.4h, #0
+; CHECK-GI-NEXT: abs v1.4s, v1.4s
+; CHECK-GI-NEXT: sshll2 v21.4s, v17.8h, #0
+; CHECK-GI-NEXT: sshll v5.4s, v5.4h, #0
+; CHECK-GI-NEXT: abs v3.4s, v3.4s
+; CHECK-GI-NEXT: sshll2 v22.4s, v19.8h, #0
+; CHECK-GI-NEXT: saba v18.4s, v4.4s, v0.4s
+; CHECK-GI-NEXT: sshll v4.4s, v7.4h, #0
+; CHECK-GI-NEXT: abs v7.4s, v20.4s
+; CHECK-GI-NEXT: saba v1.4s, v6.4s, v0.4s
+; CHECK-GI-NEXT: sshll v6.4s, v17.4h, #0
+; CHECK-GI-NEXT: abs v17.4s, v21.4s
+; CHECK-GI-NEXT: saba v3.4s, v5.4s, v0.4s
+; CHECK-GI-NEXT: sshll v5.4s, v19.4h, #0
+; CHECK-GI-NEXT: abs v19.4s, v22.4s
+; CHECK-GI-NEXT: saba v7.4s, v4.4s, v0.4s
+; CHECK-GI-NEXT: saba v17.4s, v6.4s, v0.4s
+; CHECK-GI-NEXT: saba v19.4s, v5.4s, v0.4s
+; CHECK-GI-NEXT: addv s0, v2.4s
+; CHECK-GI-NEXT: addv s2, v16.4s
+; CHECK-GI-NEXT: addv s4, v18.4s
; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: abs v6.4s, v6.4s
-; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: add v4.4s, v19.4s, v4.4s
-; CHECK-GI-NEXT: addv s3, v3.4s
-; CHECK-GI-NEXT: sshll v18.4s, v7.4h, #0
-; CHECK-GI-NEXT: sshll2 v7.4s, v7.8h, #0
-; CHECK-GI-NEXT: abs v16.4s, v16.4s
-; CHECK-GI-NEXT: abs v5.4s, v5.4s
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: add v6.4s, v17.4s, v6.4s
-; CHECK-GI-NEXT: addv s2, v2.4s
+; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: addv s4, v4.4s
-; CHECK-GI-NEXT: fmov w10, s3
-; CHECK-GI-NEXT: abs v18.4s, v18.4s
-; CHECK-GI-NEXT: abs v7.4s, v7.4s
-; CHECK-GI-NEXT: add v1.4s, v16.4s, v5.4s
+; CHECK-GI-NEXT: addv s0, v3.4s
+; CHECK-GI-NEXT: fmov w10, s4
+; CHECK-GI-NEXT: addv s2, v7.4s
; CHECK-GI-NEXT: add w8, w8, w9
-; CHECK-GI-NEXT: addv s3, v6.4s
-; CHECK-GI-NEXT: fmov w9, s2
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: addv s1, v17.4s
; CHECK-GI-NEXT: add w8, w10, w8
-; CHECK-GI-NEXT: fmov w10, s4
-; CHECK-GI-NEXT: add v0.4s, v18.4s, v7.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
; CHECK-GI-NEXT: add w8, w9, w8
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: add w8, w10, w8
-; CHECK-GI-NEXT: addv s0, v0.4s
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: addv s0, v19.4s
+; CHECK-GI-NEXT: add w8, w9, w8
+; CHECK-GI-NEXT: fmov w9, s2
; CHECK-GI-NEXT: add w8, w9, w8
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: add w8, w9, w8
More information about the llvm-commits
mailing list