[llvm] 1da4d5a - [AArch64][SVE] Add hadd and rhadd support
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 14 01:25:02 PST 2022
Author: David Green
Date: 2022-12-14T09:24:54Z
New Revision: 1da4d5aafad74a3d9a3b893335249c9e5b4a7ab9
URL: https://github.com/llvm/llvm-project/commit/1da4d5aafad74a3d9a3b893335249c9e5b4a7ab9
DIFF: https://github.com/llvm/llvm-project/commit/1da4d5aafad74a3d9a3b893335249c9e5b4a7ab9.diff
LOG: [AArch64][SVE] Add hadd and rhadd support
This adds basic HADD and RHADD support for SVE, by marking the AVGFLOOR
and AVGCEIL as custom and converting those to HADD_PRED/RHADD_PRED
AArch64 nodes. Both the existing intrinsics and the _PRED nodes are then
lowered to the _ZPmZ instructions.
Differential Revision: https://reviews.llvm.org/D131875
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve2-hadd.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index af4b98e75a801..bad7a1bcfda51 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1249,6 +1249,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
+
+ if (Subtarget->hasSVE2()) {
+ setOperationAction(ISD::AVGFLOORS, VT, Custom);
+ setOperationAction(ISD::AVGFLOORU, VT, Custom);
+ setOperationAction(ISD::AVGCEILS, VT, Custom);
+ setOperationAction(ISD::AVGCEILU, VT, Custom);
+ }
}
// Illegal unpacked integer vector types.
@@ -2219,9 +2226,13 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
MAKE_CASE(AArch64ISD::ABDS_PRED)
MAKE_CASE(AArch64ISD::ABDU_PRED)
+ MAKE_CASE(AArch64ISD::HADDS_PRED)
+ MAKE_CASE(AArch64ISD::HADDU_PRED)
MAKE_CASE(AArch64ISD::MUL_PRED)
MAKE_CASE(AArch64ISD::MULHS_PRED)
MAKE_CASE(AArch64ISD::MULHU_PRED)
+ MAKE_CASE(AArch64ISD::RHADDS_PRED)
+ MAKE_CASE(AArch64ISD::RHADDU_PRED)
MAKE_CASE(AArch64ISD::SDIV_PRED)
MAKE_CASE(AArch64ISD::SHL_PRED)
MAKE_CASE(AArch64ISD::SMAX_PRED)
@@ -5945,6 +5956,14 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
case ISD::ABDU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+ case ISD::AVGFLOORS:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDS_PRED);
+ case ISD::AVGFLOORU:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDU_PRED);
+ case ISD::AVGCEILS:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDS_PRED);
+ case ISD::AVGCEILU:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDU_PRED);
case ISD::BITREVERSE:
return LowerBitreverse(Op, DAG);
case ISD::BSWAP:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 112e88535aae0..49d43d5bce707 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -102,9 +102,13 @@ enum NodeType : unsigned {
FMINNM_PRED,
FMUL_PRED,
FSUB_PRED,
+ HADDS_PRED,
+ HADDU_PRED,
MUL_PRED,
MULHS_PRED,
MULHU_PRED,
+ RHADDS_PRED,
+ RHADDU_PRED,
SDIV_PRED,
SHL_PRED,
SMAX_PRED,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3a529041d7292..3900abd175cbd 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -189,11 +189,15 @@ def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>;
def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>;
def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>;
def AArch64sabd_p : SDNode<"AArch64ISD::ABDS_PRED", SDT_AArch64Arith>;
+def AArch64shadd_p : SDNode<"AArch64ISD::HADDS_PRED", SDT_AArch64Arith>;
+def AArch64srhadd_p : SDNode<"AArch64ISD::RHADDS_PRED", SDT_AArch64Arith>;
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;
def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>;
+def AArch64uhadd_p : SDNode<"AArch64ISD::HADDU_PRED", SDT_AArch64Arith>;
+def AArch64urhadd_p : SDNode<"AArch64ISD::RHADDU_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
@@ -258,6 +262,19 @@ def AArch64fsub_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [
(AArch64fsub_p (SVEAllActive), node:$op1, (vselect node:$pg, node:$op2, (SVEDup0)))
]>;
+def AArch64shadd : PatFrags<(ops node:$pg, node:$op1, node:$op2),
+ [(int_aarch64_sve_shadd node:$pg, node:$op1, node:$op2),
+ (AArch64shadd_p node:$pg, node:$op1, node:$op2)]>;
+def AArch64uhadd : PatFrags<(ops node:$pg, node:$op1, node:$op2),
+ [(int_aarch64_sve_uhadd node:$pg, node:$op1, node:$op2),
+ (AArch64uhadd_p node:$pg, node:$op1, node:$op2)]>;
+def AArch64srhadd : PatFrags<(ops node:$pg, node:$op1, node:$op2),
+ [(int_aarch64_sve_srhadd node:$pg, node:$op1, node:$op2),
+ (AArch64srhadd_p node:$pg, node:$op1, node:$op2)]>;
+def AArch64urhadd : PatFrags<(ops node:$pg, node:$op1, node:$op2),
+ [(int_aarch64_sve_urhadd node:$pg, node:$op1, node:$op2),
+ (AArch64urhadd_p node:$pg, node:$op1, node:$op2)]>;
+
def AArch64saba : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_saba node:$op1, node:$op2, node:$op3),
(add node:$op1, (AArch64sabd_p (SVEAllActive), node:$op2, node:$op3))]>;
@@ -3288,12 +3305,12 @@ let Predicates = [HasSVE2orSME] in {
defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt", int_aarch64_sve_sqdmlslbt>;
// SVE2 integer halving add/subtract (predicated)
- defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", int_aarch64_sve_shadd>;
- defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", int_aarch64_sve_uhadd>;
+ defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", AArch64shadd>;
+ defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", AArch64uhadd>;
defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", int_aarch64_sve_shsub>;
defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", int_aarch64_sve_uhsub>;
- defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", int_aarch64_sve_srhadd>;
- defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", int_aarch64_sve_urhadd>;
+ defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", AArch64srhadd>;
+ defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", AArch64urhadd>;
defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", int_aarch64_sve_shsubr>;
defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", int_aarch64_sve_uhsubr>;
diff --git a/llvm/test/CodeGen/AArch64/sve2-hadd.ll b/llvm/test/CodeGen/AArch64/sve2-hadd.ll
index 2d494c43d4ce7..6ad2ad0feacf6 100644
--- a/llvm/test/CodeGen/AArch64/sve2-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-hadd.ll
@@ -1,6 +1,36 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple aarch64-none-eabi -mattr=+sve2 -o - | FileCheck %s
+define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; CHECK-LABEL: hadds_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: shadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %m = add <vscale x 2 x i128> %s0s, %s1s
+ %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %s2
+}
+
+define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; CHECK-LABEL: haddu_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %m = add <vscale x 2 x i128> %s0s, %s1s
+ %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %s2
+}
+
define <vscale x 2 x i32> @hadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
; CHECK-LABEL: hadds_v2i32:
; CHECK: // %bb.0: // %entry
@@ -37,15 +67,8 @@ entry:
define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; CHECK-LABEL: hadds_v4i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sunpkhi z2.d, z0.s
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: sunpkhi z3.d, z1.s
-; CHECK-NEXT: sunpklo z1.d, z1.s
-; CHECK-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEXT: add z1.d, z2.d, z3.d
-; CHECK-NEXT: lsr z1.d, z1.d, #1
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: shadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
@@ -59,15 +82,8 @@ entry:
define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; CHECK-LABEL: haddu_v4i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uunpkhi z2.d, z0.s
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: uunpkhi z3.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEXT: add z1.d, z2.d, z3.d
-; CHECK-NEXT: lsr z1.d, z1.d, #1
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
@@ -152,15 +168,8 @@ entry:
define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; CHECK-LABEL: hadds_v8i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sunpkhi z2.s, z0.h
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sunpkhi z3.s, z1.h
-; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: add z1.s, z2.s, z3.s
-; CHECK-NEXT: lsr z1.s, z1.s, #1
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: shadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
@@ -174,15 +183,8 @@ entry:
define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; CHECK-LABEL: haddu_v8i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uunpkhi z2.s, z0.h
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpkhi z3.s, z1.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: add z1.s, z2.s, z3.s
-; CHECK-NEXT: lsr z1.s, z1.s, #1
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
@@ -267,15 +269,8 @@ entry:
define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; CHECK-LABEL: hadds_v16i8:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sunpkhi z2.h, z0.b
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpkhi z3.h, z1.b
-; CHECK-NEXT: sunpklo z1.h, z1.b
-; CHECK-NEXT: add z0.h, z0.h, z1.h
-; CHECK-NEXT: add z1.h, z2.h, z3.h
-; CHECK-NEXT: lsr z1.h, z1.h, #1
-; CHECK-NEXT: lsr z0.h, z0.h, #1
-; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: shadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
@@ -289,15 +284,8 @@ entry:
define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; CHECK-LABEL: haddu_v16i8:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uunpkhi z2.h, z0.b
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpkhi z3.h, z1.b
-; CHECK-NEXT: uunpklo z1.h, z1.b
-; CHECK-NEXT: add z0.h, z0.h, z1.h
-; CHECK-NEXT: add z1.h, z2.h, z3.h
-; CHECK-NEXT: lsr z1.h, z1.h, #1
-; CHECK-NEXT: lsr z0.h, z0.h, #1
-; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: uhadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
@@ -308,6 +296,38 @@ entry:
ret <vscale x 16 x i8> %s2
}
+define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; CHECK-LABEL: rhadds_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %add = add <vscale x 2 x i128> %s0s, %s1s
+ %add2 = add <vscale x 2 x i128> %add, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %result
+}
+
+define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; CHECK-LABEL: rhaddu_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %add = add <vscale x 2 x i128> %s0s, %s1s
+ %add2 = add <vscale x 2 x i128> %add, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %result
+}
+
define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
; CHECK-LABEL: rhadds_v2i32:
; CHECK: // %bb.0: // %entry
@@ -352,18 +372,8 @@ entry:
define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; CHECK-LABEL: rhadds_v4i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sunpkhi z3.d, z0.s
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: sunpkhi z4.d, z1.s
-; CHECK-NEXT: sunpklo z1.d, z1.s
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: sub z1.d, z4.d, z2.d
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: lsr z1.d, z1.d, #1
-; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
@@ -378,18 +388,8 @@ entry:
define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
; CHECK-LABEL: rhaddu_v4i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uunpkhi z3.d, z0.s
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: uunpkhi z4.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: sub z1.d, z4.d, z2.d
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: lsr z1.d, z1.d, #1
-; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
@@ -487,18 +487,8 @@ entry:
define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; CHECK-LABEL: rhadds_v8i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sunpkhi z3.s, z0.h
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sunpkhi z4.s, z1.h
-; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: sub z1.s, z4.s, z2.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: lsr z1.s, z1.s, #1
-; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
@@ -513,18 +503,8 @@ entry:
define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
; CHECK-LABEL: rhaddu_v8i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uunpkhi z3.s, z0.h
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpkhi z4.s, z1.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: sub z1.s, z4.s, z2.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: lsr z1.s, z1.s, #1
-; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
@@ -622,18 +602,8 @@ entry:
define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; CHECK-LABEL: rhadds_v16i8:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sunpkhi z3.h, z0.b
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpkhi z4.h, z1.b
-; CHECK-NEXT: sunpklo z1.h, z1.b
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-NEXT: sub z0.h, z1.h, z0.h
-; CHECK-NEXT: sub z1.h, z4.h, z2.h
-; CHECK-NEXT: lsr z0.h, z0.h, #1
-; CHECK-NEXT: lsr z1.h, z1.h, #1
-; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: srhadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
@@ -648,18 +618,8 @@ entry:
define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
; CHECK-LABEL: rhaddu_v16i8:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uunpkhi z3.h, z0.b
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpkhi z4.h, z1.b
-; CHECK-NEXT: uunpklo z1.h, z1.b
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: eor z2.d, z3.d, z2.d
-; CHECK-NEXT: sub z0.h, z1.h, z0.h
-; CHECK-NEXT: sub z1.h, z4.h, z2.h
-; CHECK-NEXT: lsr z0.h, z0.h, #1
-; CHECK-NEXT: lsr z1.h, z1.h, #1
-; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: urhadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
More information about the llvm-commits
mailing list