[llvm] 40a51e1 - [AArch64][SVE]: custom lower AVGFloor/AVGCeil.
Hassnaa Hamdi via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 13 12:01:07 PDT 2023
Author: Hassnaa Hamdi
Date: 2023-03-13T19:00:57Z
New Revision: 40a51e1afce989c2adc503cbca2a6cce4a587d84
URL: https://github.com/llvm/llvm-project/commit/40a51e1afce989c2adc503cbca2a6cce4a587d84
DIFF: https://github.com/llvm/llvm-project/commit/40a51e1afce989c2adc503cbca2a6cce4a587d84.diff
LOG: [AArch64][SVE]: custom lower AVGFloor/AVGCeil.
-Lower AVGFloor(A, B) to:
SRL(A) + SRL(B) + (A&B)&1.
-Lower AVGCeil(A, B) to:
SRL(A) + SRL(B) + (A|B)&1.
Differential Revision: https://reviews.llvm.org/D143283
Added:
llvm/test/CodeGen/AArch64/sve-hadd.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
Removed:
llvm/test/CodeGen/AArch64/sve2-hadd.ll
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4830f12c12c57..f4f16a2e0fb10 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1302,12 +1302,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
- if (Subtarget->hasSVE2()) {
- setOperationAction(ISD::AVGFLOORS, VT, Custom);
- setOperationAction(ISD::AVGFLOORU, VT, Custom);
- setOperationAction(ISD::AVGCEILS, VT, Custom);
- setOperationAction(ISD::AVGCEILU, VT, Custom);
- }
+ setOperationAction(ISD::AVGFLOORS, VT, Custom);
+ setOperationAction(ISD::AVGFLOORU, VT, Custom);
+ setOperationAction(ISD::AVGCEILS, VT, Custom);
+ setOperationAction(ISD::AVGCEILU, VT, Custom);
}
// Illegal unpacked integer vector types.
@@ -5977,13 +5975,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::ABDU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
case ISD::AVGFLOORS:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDS_PRED);
+ return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
case ISD::AVGFLOORU:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDU_PRED);
+ return LowerAVG(Op, DAG, AArch64ISD::HADDU_PRED);
case ISD::AVGCEILS:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDS_PRED);
+ return LowerAVG(Op, DAG, AArch64ISD::RHADDS_PRED);
case ISD::AVGCEILU:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDU_PRED);
+ return LowerAVG(Op, DAG, AArch64ISD::RHADDU_PRED);
case ISD::BITREVERSE:
return LowerBitreverse(Op, DAG);
case ISD::BSWAP:
@@ -13244,6 +13242,57 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
return Chain;
}
+// When x and y are extended, lower:
+// avgfloor(x, y) -> (x + y) >> 1
+// avgceil(x, y) -> (x + y + 1) >> 1
+
+// Otherwise, lower to:
+// avgfloor(x, y) -> (x >> 1) + (y >> 1) + (x & y & 1)
+// avgceil(x, y) -> (x >> 1) + (y >> 1) + ((x || y) & 1)
+SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG,
+ unsigned NewOp) const {
+ if (Subtarget->hasSVE2())
+ return LowerToPredicatedOp(Op, DAG, NewOp);
+
+ SDLoc dl(Op);
+ SDValue OpA = Op->getOperand(0);
+ SDValue OpB = Op->getOperand(1);
+ EVT VT = Op.getValueType();
+ bool IsCeil =
+ (Op->getOpcode() == ISD::AVGCEILS || Op->getOpcode() == ISD::AVGCEILU);
+ bool IsSigned =
+ (Op->getOpcode() == ISD::AVGFLOORS || Op->getOpcode() == ISD::AVGCEILS);
+ unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
+
+ assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
+
+ auto IsZeroExtended = [&DAG](SDValue &Node) {
+ KnownBits Known = DAG.computeKnownBits(Node, 0);
+ return Known.Zero.isSignBitSet();
+ };
+
+ auto IsSignExtended = [&DAG](SDValue &Node) {
+ return (DAG.ComputeNumSignBits(Node, 0) > 1);
+ };
+
+ SDValue ConstantOne = DAG.getConstant(1, dl, VT);
+ if ((!IsSigned && IsZeroExtended(OpA) && IsZeroExtended(OpB)) ||
+ (IsSigned && IsSignExtended(OpA) && IsSignExtended(OpB))) {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, OpA, OpB);
+ if (IsCeil)
+ Add = DAG.getNode(ISD::ADD, dl, VT, Add, ConstantOne);
+ return DAG.getNode(ShiftOpc, dl, VT, Add, ConstantOne);
+ }
+
+ SDValue ShiftOpA = DAG.getNode(ShiftOpc, dl, VT, OpA, ConstantOne);
+ SDValue ShiftOpB = DAG.getNode(ShiftOpc, dl, VT, OpB, ConstantOne);
+
+ SDValue tmp = DAG.getNode(IsCeil ? ISD::OR : ISD::AND, dl, VT, OpA, OpB);
+ tmp = DAG.getNode(ISD::AND, dl, VT, tmp, ConstantOne);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, ShiftOpA, ShiftOpB);
+ return DAG.getNode(ISD::ADD, dl, VT, Add, tmp);
+}
+
SDValue
AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index de87d0c0c2f57..c1b212776fbb7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1090,6 +1090,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
SDValue &Size,
SelectionDAG &DAG) const;
+ SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll
new file mode 100644
index 0000000000000..82f9e51d9ca2e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll
@@ -0,0 +1,1295 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve | FileCheck %s -check-prefixes=CHECK,SVE
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+sve2 | FileCheck %s -check-prefixes=CHECK,SVE2
+
+define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: hadds_v2i64:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.d, z1.d, #1
+; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.d, z3.d, z2.d
+; SVE-NEXT: and z0.d, z0.d, #0x1
+; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v2i64:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %m = add nsw <vscale x 2 x i128> %s0s, %s1s
+ %s = ashr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %s2
+}
+
+define <vscale x 2 x i64> @hadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: hadds_v2i64_lsh:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.d, z1.d, #1
+; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.d, z3.d, z2.d
+; SVE-NEXT: and z0.d, z0.d, #0x1
+; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v2i64_lsh:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %m = add nsw <vscale x 2 x i128> %s0s, %s1s
+ %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %s2
+}
+
+define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: haddu_v2i64:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: lsr z2.d, z1.d, #1
+; SVE-NEXT: lsr z3.d, z0.d, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.d, z3.d, z2.d
+; SVE-NEXT: and z0.d, z0.d, #0x1
+; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v2i64:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %m = add nuw nsw <vscale x 2 x i128> %s0s, %s1s
+ %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %s2
+}
+
+define <vscale x 2 x i32> @hadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; SVE-LABEL: hadds_v2i32:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.d
+; SVE-NEXT: sxtw z0.d, p0/m, z0.d
+; SVE-NEXT: adr z0.d, [z0.d, z1.d, sxtw]
+; SVE-NEXT: asr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v2i32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: sxtw z0.d, p0/m, z0.d
+; SVE2-NEXT: sxtw z1.d, p0/m, z1.d
+; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+ %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+ %m = add nsw <vscale x 2 x i64> %s0s, %s1s
+ %s = ashr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %s2
+}
+
+define <vscale x 2 x i32> @hadds_v2i32_lsh(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: hadds_v2i32_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT: adr z0.d, [z0.d, z1.d, sxtw]
+; CHECK-NEXT: lsr z0.d, z0.d, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+ %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+ %m = add nsw <vscale x 2 x i64> %s0s, %s1s
+ %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %s2
+}
+
+define <vscale x 2 x i32> @haddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; SVE-LABEL: haddu_v2i32:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; SVE-NEXT: adr z0.d, [z0.d, z1.d, uxtw]
+; SVE-NEXT: lsr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v2i32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: and z0.d, z0.d, #0xffffffff
+; SVE2-NEXT: and z1.d, z1.d, #0xffffffff
+; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+ %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+ %m = add nuw nsw <vscale x 2 x i64> %s0s, %s1s
+ %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %s2
+}
+
+define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: hadds_v4i32:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.s, z1.s, #1
+; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.s, z3.s, z2.s
+; SVE-NEXT: and z0.s, z0.s, #0x1
+; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v4i32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+ %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+ %m = add nsw <vscale x 4 x i64> %s0s, %s1s
+ %s = ashr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %s2
+}
+
+define <vscale x 4 x i32> @hadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: hadds_v4i32_lsh:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.s, z1.s, #1
+; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.s, z3.s, z2.s
+; SVE-NEXT: and z0.s, z0.s, #0x1
+; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v4i32_lsh:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+ %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+ %m = add nsw <vscale x 4 x i64> %s0s, %s1s
+ %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %s2
+}
+
+define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: haddu_v4i32:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: lsr z2.s, z1.s, #1
+; SVE-NEXT: lsr z3.s, z0.s, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.s, z3.s, z2.s
+; SVE-NEXT: and z0.s, z0.s, #0x1
+; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v4i32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+ %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+ %m = add nuw nsw <vscale x 4 x i64> %s0s, %s1s
+ %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %s2
+}
+
+define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; SVE-LABEL: hadds_v2i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.d
+; SVE-NEXT: sxth z0.d, p0/m, z0.d
+; SVE-NEXT: sxth z1.d, p0/m, z1.d
+; SVE-NEXT: add z0.d, z0.d, z1.d
+; SVE-NEXT: asr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v2i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: sxth z0.d, p0/m, z0.d
+; SVE2-NEXT: sxth z1.d, p0/m, z1.d
+; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+ %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+ %m = add nsw <vscale x 2 x i32> %s0s, %s1s
+ %s = ashr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %s2
+}
+
+define <vscale x 2 x i16> @hadds_v2i16_lsh(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: hadds_v2i16_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sxth z0.d, p0/m, z0.d
+; CHECK-NEXT: sxth z1.d, p0/m, z1.d
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT: lsr z0.d, z0.d, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+ %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+ %m = add nsw <vscale x 2 x i32> %s0s, %s1s
+ %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %s2
+}
+
+define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; SVE-LABEL: haddu_v2i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: and z0.d, z0.d, #0xffff
+; SVE-NEXT: and z1.d, z1.d, #0xffff
+; SVE-NEXT: add z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v2i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: and z0.d, z0.d, #0xffff
+; SVE2-NEXT: and z1.d, z1.d, #0xffff
+; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+ %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+ %m = add nuw nsw <vscale x 2 x i32> %s0s, %s1s
+ %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %s2
+}
+
+define <vscale x 4 x i16> @hadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; SVE-LABEL: hadds_v4i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.s
+; SVE-NEXT: sxth z0.s, p0/m, z0.s
+; SVE-NEXT: sxth z1.s, p0/m, z1.s
+; SVE-NEXT: add z0.s, z0.s, z1.s
+; SVE-NEXT: asr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v4i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: sxth z0.s, p0/m, z0.s
+; SVE2-NEXT: sxth z1.s, p0/m, z1.s
+; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+ %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+ %m = add nsw <vscale x 4 x i32> %s0s, %s1s
+ %s = ashr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %s2
+}
+
+define <vscale x 4 x i16> @hadds_v4i16_lsh(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: hadds_v4i16_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: sxth z0.s, p0/m, z0.s
+; CHECK-NEXT: sxth z1.s, p0/m, z1.s
+; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: lsr z0.s, z0.s, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+ %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+ %m = add nsw <vscale x 4 x i32> %s0s, %s1s
+ %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %s2
+}
+
+define <vscale x 4 x i16> @haddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; SVE-LABEL: haddu_v4i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: and z0.s, z0.s, #0xffff
+; SVE-NEXT: and z1.s, z1.s, #0xffff
+; SVE-NEXT: add z0.s, z0.s, z1.s
+; SVE-NEXT: lsr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v4i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: and z0.s, z0.s, #0xffff
+; SVE2-NEXT: and z1.s, z1.s, #0xffff
+; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+ %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+ %m = add nuw nsw <vscale x 4 x i32> %s0s, %s1s
+ %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %s2
+}
+
+define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: hadds_v8i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.h, z1.h, #1
+; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.h, z3.h, z2.h
+; SVE-NEXT: and z0.h, z0.h, #0x1
+; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v8i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+ %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+ %m = add nsw <vscale x 8 x i32> %s0s, %s1s
+ %s = ashr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %s2
+}
+
+define <vscale x 8 x i16> @hadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: hadds_v8i16_lsh:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.h, z1.h, #1
+; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.h, z3.h, z2.h
+; SVE-NEXT: and z0.h, z0.h, #0x1
+; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v8i16_lsh:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+ %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+ %m = add nsw <vscale x 8 x i32> %s0s, %s1s
+ %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %s2
+}
+
+define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: haddu_v8i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: lsr z2.h, z1.h, #1
+; SVE-NEXT: lsr z3.h, z0.h, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.h, z3.h, z2.h
+; SVE-NEXT: and z0.h, z0.h, #0x1
+; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v8i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+ %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+ %m = add nuw nsw <vscale x 8 x i32> %s0s, %s1s
+ %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %s2
+}
+
+define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; SVE-LABEL: hadds_v4i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.s
+; SVE-NEXT: sxtb z0.s, p0/m, z0.s
+; SVE-NEXT: sxtb z1.s, p0/m, z1.s
+; SVE-NEXT: add z0.s, z0.s, z1.s
+; SVE-NEXT: asr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v4i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: sxtb z0.s, p0/m, z0.s
+; SVE2-NEXT: sxtb z1.s, p0/m, z1.s
+; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+ %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+ %m = add nsw <vscale x 4 x i16> %s0s, %s1s
+ %s = ashr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+ ret <vscale x 4 x i8> %s2
+}
+
+define <vscale x 4 x i8> @hadds_v4i8_lsh(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: hadds_v4i8_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
+; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-NEXT: lsr z0.s, z0.s, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+ %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+ %m = add nsw <vscale x 4 x i16> %s0s, %s1s
+ %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+ ret <vscale x 4 x i8> %s2
+}
+
+define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; SVE-LABEL: haddu_v4i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: and z0.s, z0.s, #0xff
+; SVE-NEXT: and z1.s, z1.s, #0xff
+; SVE-NEXT: add z0.s, z0.s, z1.s
+; SVE-NEXT: lsr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v4i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: and z0.s, z0.s, #0xff
+; SVE2-NEXT: and z1.s, z1.s, #0xff
+; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+ %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+ %m = add nuw nsw <vscale x 4 x i16> %s0s, %s1s
+ %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+ ret <vscale x 4 x i8> %s2
+}
+
+define <vscale x 8 x i8> @hadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; SVE-LABEL: hadds_v8i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: ptrue p0.h
+; SVE-NEXT: sxtb z0.h, p0/m, z0.h
+; SVE-NEXT: sxtb z1.h, p0/m, z1.h
+; SVE-NEXT: add z0.h, z0.h, z1.h
+; SVE-NEXT: asr z0.h, z0.h, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v8i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: sxtb z0.h, p0/m, z0.h
+; SVE2-NEXT: sxtb z1.h, p0/m, z1.h
+; SVE2-NEXT: shadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+ %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+ %m = add nsw <vscale x 8 x i16> %s0s, %s1s
+ %s = ashr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %s2
+}
+
+define <vscale x 8 x i8> @hadds_v8i8_lsh(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: hadds_v8i8_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT: add z0.h, z0.h, z1.h
+; CHECK-NEXT: lsr z0.h, z0.h, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+ %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+ %m = add nsw <vscale x 8 x i16> %s0s, %s1s
+ %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %s2
+}
+
+define <vscale x 8 x i8> @haddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; SVE-LABEL: haddu_v8i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: and z0.h, z0.h, #0xff
+; SVE-NEXT: and z1.h, z1.h, #0xff
+; SVE-NEXT: add z0.h, z0.h, z1.h
+; SVE-NEXT: lsr z0.h, z0.h, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v8i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: and z0.h, z0.h, #0xff
+; SVE2-NEXT: and z1.h, z1.h, #0xff
+; SVE2-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+ %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+ %m = add nuw nsw <vscale x 8 x i16> %s0s, %s1s
+ %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %s2
+}
+
+define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: hadds_v16i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.b, z1.b, #1
+; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.b, z3.b, z2.b
+; SVE-NEXT: and z0.b, z0.b, #0x1
+; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v16i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.b
+; SVE2-NEXT: shadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+ %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+ %m = add nsw <vscale x 16 x i16> %s0s, %s1s
+ %s = ashr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %s2
+}
+
+define <vscale x 16 x i8> @hadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: hadds_v16i8_lsh:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.b, z1.b, #1
+; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.b, z3.b, z2.b
+; SVE-NEXT: and z0.b, z0.b, #0x1
+; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: hadds_v16i8_lsh:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.b
+; SVE2-NEXT: shadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+ %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+ %m = add nsw <vscale x 16 x i16> %s0s, %s1s
+ %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %s2
+}
+
+define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: haddu_v16i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: lsr z2.b, z1.b, #1
+; SVE-NEXT: lsr z3.b, z0.b, #1
+; SVE-NEXT: and z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.b, z3.b, z2.b
+; SVE-NEXT: and z0.b, z0.b, #0x1
+; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: haddu_v16i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.b
+; SVE2-NEXT: uhadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+ %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+ %m = add nuw nsw <vscale x 16 x i16> %s0s, %s1s
+ %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+ %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %s2
+}
+
+define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: rhadds_v2i64:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.d, z1.d, #1
+; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.d, z3.d, z2.d
+; SVE-NEXT: and z0.d, z0.d, #0x1
+; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v2i64:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %add = add <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %add2 = add <vscale x 2 x i128> %add, %s1s
+ %s = ashr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %result
+}
+
+define <vscale x 2 x i64> @rhadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: rhadds_v2i64_lsh:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.d, z1.d, #1
+; SVE-NEXT: asr z3.d, z0.d, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.d, z3.d, z2.d
+; SVE-NEXT: and z0.d, z0.d, #0x1
+; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v2i64_lsh:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %add = add <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %add2 = add <vscale x 2 x i128> %add, %s1s
+ %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %result
+}
+
+define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: rhaddu_v2i64:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: lsr z2.d, z1.d, #1
+; SVE-NEXT: lsr z3.d, z0.d, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.d, z3.d, z2.d
+; SVE-NEXT: and z0.d, z0.d, #0x1
+; SVE-NEXT: add z0.d, z1.d, z0.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v2i64:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+ %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+ %add = add nuw nsw <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %add2 = add nuw nsw <vscale x 2 x i128> %add, %s1s
+ %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %result
+}
+
+define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: rhadds_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.d, z1.d, z0.d
+; CHECK-NEXT: asr z0.d, z0.d, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+ %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+ %add = add <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+ %add2 = add <vscale x 2 x i64> %add, %s1s
+ %s = ashr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 2 x i32> @rhadds_v2i32_lsh(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: rhadds_v2i32_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.d, z1.d, z0.d
+; CHECK-NEXT: lsr z0.d, z0.d, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+ %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+ %add = add <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+ %add2 = add <vscale x 2 x i64> %add, %s1s
+ %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 2 x i32> @rhaddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; SVE-LABEL: rhaddu_v2i32:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; SVE-NEXT: and z0.d, z0.d, #0xffffffff
+; SVE-NEXT: and z1.d, z1.d, #0xffffffff
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.d, z1.d, z0.d
+; SVE-NEXT: lsr z0.d, z0.d, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v2i32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: and z0.d, z0.d, #0xffffffff
+; SVE2-NEXT: and z1.d, z1.d, #0xffffffff
+; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+ %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+ %add = add nuw nsw <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+ %add2 = add nuw nsw <vscale x 2 x i64> %add, %s1s
+ %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+ ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: rhadds_v4i32:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.s, z1.s, #1
+; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.s, z3.s, z2.s
+; SVE-NEXT: and z0.s, z0.s, #0x1
+; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v4i32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+ %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+ %add = add <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %add2 = add <vscale x 4 x i64> %add, %s1s
+ %s = ashr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 4 x i32> @rhadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: rhadds_v4i32_lsh:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.s, z1.s, #1
+; SVE-NEXT: asr z3.s, z0.s, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.s, z3.s, z2.s
+; SVE-NEXT: and z0.s, z0.s, #0x1
+; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v4i32_lsh:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+ %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+ %add = add <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %add2 = add <vscale x 4 x i64> %add, %s1s
+ %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: rhaddu_v4i32:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: lsr z2.s, z1.s, #1
+; SVE-NEXT: lsr z3.s, z0.s, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.s, z3.s, z2.s
+; SVE-NEXT: and z0.s, z0.s, #0x1
+; SVE-NEXT: add z0.s, z1.s, z0.s
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v4i32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+ %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+ %add = add nuw nsw <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %add2 = add nuw nsw <vscale x 4 x i64> %add, %s1s
+ %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 2 x i16> @rhadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: rhadds_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxth z0.d, p0/m, z0.d
+; CHECK-NEXT: sxth z1.d, p0/m, z1.d
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.d, z1.d, z0.d
+; CHECK-NEXT: asr z0.d, z0.d, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+ %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+ %add = add <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+ %add2 = add <vscale x 2 x i32> %add, %s1s
+ %s = ashr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %result
+}
+
+define <vscale x 2 x i16> @rhadds_v2i16_lsh(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: rhadds_v2i16_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxth z0.d, p0/m, z0.d
+; CHECK-NEXT: sxth z1.d, p0/m, z1.d
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.d, z1.d, z0.d
+; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT: lsr z0.d, z0.d, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+ %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+ %add = add <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+ %add2 = add <vscale x 2 x i32> %add, %s1s
+ %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %result
+}
+
+define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: rhaddu_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: and z0.d, z0.d, #0xffff
+; CHECK-NEXT: and z1.d, z1.d, #0xffff
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.d, z1.d, z0.d
+; CHECK-NEXT: lsr z0.d, z0.d, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+ %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+ %add = add nuw nsw <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+ %add2 = add nuw nsw <vscale x 2 x i32> %add, %s1s
+ %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+ %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+ ret <vscale x 2 x i16> %result
+}
+
+define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: rhadds_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxth z0.s, p0/m, z0.s
+; CHECK-NEXT: sxth z1.s, p0/m, z1.s
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.s, z1.s, z0.s
+; CHECK-NEXT: asr z0.s, z0.s, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+ %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+ %add = add <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %add2 = add <vscale x 4 x i32> %add, %s1s
+ %s = ashr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %result
+}
+
+define <vscale x 4 x i16> @rhadds_v4i16_lsh(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: rhadds_v4i16_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxth z0.s, p0/m, z0.s
+; CHECK-NEXT: sxth z1.s, p0/m, z1.s
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.s, z1.s, z0.s
+; CHECK-NEXT: lsr z0.s, z0.s, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+ %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+ %add = add <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %add2 = add <vscale x 4 x i32> %add, %s1s
+ %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %result
+}
+
+define <vscale x 4 x i16> @rhaddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; SVE-LABEL: rhaddu_v4i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; SVE-NEXT: and z0.s, z0.s, #0xffff
+; SVE-NEXT: and z1.s, z1.s, #0xffff
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.s, z1.s, z0.s
+; SVE-NEXT: lsr z0.s, z0.s, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v4i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: and z0.s, z0.s, #0xffff
+; SVE2-NEXT: and z1.s, z1.s, #0xffff
+; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+ %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+ %add = add nuw nsw <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %add2 = add nuw nsw <vscale x 4 x i32> %add, %s1s
+ %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+ ret <vscale x 4 x i16> %result
+}
+
+define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: rhadds_v8i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.h, z1.h, #1
+; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.h, z3.h, z2.h
+; SVE-NEXT: and z0.h, z0.h, #0x1
+; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v8i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+ %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+ %add = add <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %add2 = add <vscale x 8 x i32> %add, %s1s
+ %s = ashr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 8 x i16> @rhadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: rhadds_v8i16_lsh:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.h, z1.h, #1
+; SVE-NEXT: asr z3.h, z0.h, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.h, z3.h, z2.h
+; SVE-NEXT: and z0.h, z0.h, #0x1
+; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v8i16_lsh:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+ %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+ %add = add <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %add2 = add <vscale x 8 x i32> %add, %s1s
+ %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: rhaddu_v8i16:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: lsr z2.h, z1.h, #1
+; SVE-NEXT: lsr z3.h, z0.h, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.h, z3.h, z2.h
+; SVE-NEXT: and z0.h, z0.h, #0x1
+; SVE-NEXT: add z0.h, z1.h, z0.h
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v8i16:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+ %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+ %add = add nuw nsw <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %add2 = add nuw nsw <vscale x 8 x i32> %add, %s1s
+ %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 4 x i8> @rhadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: rhadds_v4i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.s, z1.s, z0.s
+; CHECK-NEXT: asr z0.s, z0.s, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+ %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+ %add = add <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+ %add2 = add <vscale x 4 x i16> %add, %s1s
+ %s = ashr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+ %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+ ret <vscale x 4 x i8> %result
+}
+
+define <vscale x 4 x i8> @rhadds_v4i8_lsh(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: rhadds_v4i8_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.s, z1.s, z0.s
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-NEXT: lsr z0.s, z0.s, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+ %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+ %add = add <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+ %add2 = add <vscale x 4 x i16> %add, %s1s
+ %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+ %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+ ret <vscale x 4 x i8> %result
+}
+
+define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: rhaddu_v4i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: and z0.s, z0.s, #0xff
+; CHECK-NEXT: and z1.s, z1.s, #0xff
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.s, z1.s, z0.s
+; CHECK-NEXT: lsr z0.s, z0.s, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+ %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+ %add = add nuw nsw <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+ %add2 = add nuw nsw <vscale x 4 x i16> %add, %s1s
+ %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+ %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+ ret <vscale x 4 x i8> %result
+}
+
+define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: rhadds_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.h, z1.h, z0.h
+; CHECK-NEXT: asr z0.h, z0.h, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+ %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+ %add = add <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+ %add2 = add <vscale x 8 x i16> %add, %s1s
+ %s = ashr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+ %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %result
+}
+
+define <vscale x 8 x i8> @rhadds_v8i8_lsh(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: rhadds_v8i8_lsh:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT: eor z0.d, z0.d, z2.d
+; CHECK-NEXT: sub z0.h, z1.h, z0.h
+; CHECK-NEXT: lsr z0.h, z0.h, #1
+; CHECK-NEXT: ret
+entry:
+ %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+ %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+ %add = add <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+ %add2 = add <vscale x 8 x i16> %add, %s1s
+ %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+ %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %result
+}
+
+define <vscale x 8 x i8> @rhaddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; SVE-LABEL: rhaddu_v8i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
+; SVE-NEXT: and z0.h, z0.h, #0xff
+; SVE-NEXT: and z1.h, z1.h, #0xff
+; SVE-NEXT: eor z0.d, z0.d, z2.d
+; SVE-NEXT: sub z0.h, z1.h, z0.h
+; SVE-NEXT: lsr z0.h, z0.h, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v8i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.h
+; SVE2-NEXT: and z0.h, z0.h, #0xff
+; SVE2-NEXT: and z1.h, z1.h, #0xff
+; SVE2-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+ %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+ %add = add nuw nsw <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+ %add2 = add nuw nsw <vscale x 8 x i16> %add, %s1s
+ %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+ %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+ ret <vscale x 8 x i8> %result
+}
+
+define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: rhadds_v16i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.b, z1.b, #1
+; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.b, z3.b, z2.b
+; SVE-NEXT: and z0.b, z0.b, #0x1
+; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v16i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.b
+; SVE2-NEXT: srhadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+ %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+ %add = add <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+ %add2 = add <vscale x 16 x i16> %add, %s1s
+ %s = ashr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+ %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %result
+}
+
+define <vscale x 16 x i8> @rhadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: rhadds_v16i8_lsh:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: asr z2.b, z1.b, #1
+; SVE-NEXT: asr z3.b, z0.b, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.b, z3.b, z2.b
+; SVE-NEXT: and z0.b, z0.b, #0x1
+; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhadds_v16i8_lsh:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.b
+; SVE2-NEXT: srhadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT: ret
+entry:
+ %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+ %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+ %add = add <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+ %add2 = add <vscale x 16 x i16> %add, %s1s
+ %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+ %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %result
+}
+
+define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: rhaddu_v16i8:
+; SVE: // %bb.0: // %entry
+; SVE-NEXT: lsr z2.b, z1.b, #1
+; SVE-NEXT: lsr z3.b, z0.b, #1
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: add z1.b, z3.b, z2.b
+; SVE-NEXT: and z0.b, z0.b, #0x1
+; SVE-NEXT: add z0.b, z1.b, z0.b
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: rhaddu_v16i8:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: ptrue p0.b
+; SVE2-NEXT: urhadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT: ret
+entry:
+ %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+ %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+ %add = add nuw nsw <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+ %add2 = add nuw nsw <vscale x 16 x i16> %add, %s1s
+ %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+ %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %result
+}
diff --git a/llvm/test/CodeGen/AArch64/sve2-hadd.ll b/llvm/test/CodeGen/AArch64/sve2-hadd.ll
deleted file mode 100644
index 9363412ff06ba..0000000000000
--- a/llvm/test/CodeGen/AArch64/sve2-hadd.ll
+++ /dev/null
@@ -1,627 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple aarch64-none-eabi -mattr=+sve2 -o - | FileCheck %s
-
-define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
-; CHECK-LABEL: hadds_v2i64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: shadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
- %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
- %m = add nsw <vscale x 2 x i128> %s0s, %s1s
- %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
- %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
- ret <vscale x 2 x i64> %s2
-}
-
-define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
-; CHECK-LABEL: haddu_v2i64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
- %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
- %m = add nuw nsw <vscale x 2 x i128> %s0s, %s1s
- %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
- %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
- ret <vscale x 2 x i64> %s2
-}
-
-define <vscale x 2 x i32> @hadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: hadds_v2i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
-; CHECK-NEXT: adr z0.d, [z0.d, z1.d, sxtw]
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
- %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
- %m = add nsw <vscale x 2 x i64> %s0s, %s1s
- %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
- %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
- ret <vscale x 2 x i32> %s2
-}
-
-define <vscale x 2 x i32> @haddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: haddu_v2i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
- %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
- %m = add nuw nsw <vscale x 2 x i64> %s0s, %s1s
- %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
- %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
- ret <vscale x 2 x i32> %s2
-}
-
-define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
-; CHECK-LABEL: hadds_v4i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: shadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
- %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
- %m = add nsw <vscale x 4 x i64> %s0s, %s1s
- %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
- %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
- ret <vscale x 4 x i32> %s2
-}
-
-define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
-; CHECK-LABEL: haddu_v4i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
- %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
- %m = add nuw nsw <vscale x 4 x i64> %s0s, %s1s
- %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
- %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
- ret <vscale x 4 x i32> %s2
-}
-
-define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: hadds_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sxth z0.d, p0/m, z0.d
-; CHECK-NEXT: sxth z1.d, p0/m, z1.d
-; CHECK-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
- %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
- %m = add nsw <vscale x 2 x i32> %s0s, %s1s
- %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
- %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
- ret <vscale x 2 x i16> %s2
-}
-
-define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: haddu_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: and z0.d, z0.d, #0xffff
-; CHECK-NEXT: and z1.d, z1.d, #0xffff
-; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
- %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
- %m = add nuw nsw <vscale x 2 x i32> %s0s, %s1s
- %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
- %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
- ret <vscale x 2 x i16> %s2
-}
-
-define <vscale x 4 x i16> @hadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: hadds_v4i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sxth z0.s, p0/m, z0.s
-; CHECK-NEXT: sxth z1.s, p0/m, z1.s
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
- %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
- %m = add nsw <vscale x 4 x i32> %s0s, %s1s
- %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
- %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
- ret <vscale x 4 x i16> %s2
-}
-
-define <vscale x 4 x i16> @haddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: haddu_v4i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: and z0.s, z0.s, #0xffff
-; CHECK-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
- %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
- %m = add nuw nsw <vscale x 4 x i32> %s0s, %s1s
- %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
- %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
- ret <vscale x 4 x i16> %s2
-}
-
-define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
-; CHECK-LABEL: hadds_v8i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: shadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
- %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
- %m = add nsw <vscale x 8 x i32> %s0s, %s1s
- %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
- %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
- ret <vscale x 8 x i16> %s2
-}
-
-define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
-; CHECK-LABEL: haddu_v8i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
- %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
- %m = add nuw nsw <vscale x 8 x i32> %s0s, %s1s
- %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
- %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
- ret <vscale x 8 x i16> %s2
-}
-
-define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: hadds_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
-; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: and z0.s, z0.s, #0xffff
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
- %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
- %m = add nsw <vscale x 4 x i16> %s0s, %s1s
- %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
- %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
- ret <vscale x 4 x i8> %s2
-}
-
-define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: haddu_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: and z1.s, z1.s, #0xff
-; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
- %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
- %m = add nuw nsw <vscale x 4 x i16> %s0s, %s1s
- %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
- %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
- ret <vscale x 4 x i8> %s2
-}
-
-define <vscale x 8 x i8> @hadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: hadds_v8i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
-; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
-; CHECK-NEXT: add z0.h, z0.h, z1.h
-; CHECK-NEXT: lsr z0.h, z0.h, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
- %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
- %m = add nsw <vscale x 8 x i16> %s0s, %s1s
- %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
- %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
- ret <vscale x 8 x i8> %s2
-}
-
-define <vscale x 8 x i8> @haddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: haddu_v8i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: and z0.h, z0.h, #0xff
-; CHECK-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEXT: uhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
- %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
- %m = add nuw nsw <vscale x 8 x i16> %s0s, %s1s
- %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
- %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
- ret <vscale x 8 x i8> %s2
-}
-
-define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
-; CHECK-LABEL: hadds_v16i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: shadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
- %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
- %m = add nsw <vscale x 16 x i16> %s0s, %s1s
- %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
- %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
- ret <vscale x 16 x i8> %s2
-}
-
-define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
-; CHECK-LABEL: haddu_v16i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: uhadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
- %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
- %m = add nuw nsw <vscale x 16 x i16> %s0s, %s1s
- %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
- %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
- ret <vscale x 16 x i8> %s2
-}
-
-define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
-; CHECK-LABEL: rhadds_v2i64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: srhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
- %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
- %add = add <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
- %add2 = add <vscale x 2 x i128> %add, %s1s
- %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
- %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
- ret <vscale x 2 x i64> %result
-}
-
-define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
-; CHECK-LABEL: rhaddu_v2i64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
- %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
- %add = add nuw nsw <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
- %add2 = add nuw nsw <vscale x 2 x i128> %add, %s1s
- %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
- %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
- ret <vscale x 2 x i64> %result
-}
-
-define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: rhadds_v2i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
-; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
- %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
- %add = add <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
- %add2 = add <vscale x 2 x i64> %add, %s1s
- %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
- %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
- ret <vscale x 2 x i32> %result
-}
-
-define <vscale x 2 x i32> @rhaddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: rhaddu_v2i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
- %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
- %add = add nuw nsw <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
- %add2 = add nuw nsw <vscale x 2 x i64> %add, %s1s
- %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
- %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
- ret <vscale x 2 x i32> %result
-}
-
-define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
-; CHECK-LABEL: rhadds_v4i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: srhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
- %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
- %add = add <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
- %add2 = add <vscale x 4 x i64> %add, %s1s
- %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
- %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
- ret <vscale x 4 x i32> %result
-}
-
-define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
-; CHECK-LABEL: rhaddu_v4i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
- %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
- %add = add nuw nsw <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
- %add2 = add nuw nsw <vscale x 4 x i64> %add, %s1s
- %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
- %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
- ret <vscale x 4 x i32> %result
-}
-
-define <vscale x 2 x i16> @rhadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: rhadds_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxth z0.d, p0/m, z0.d
-; CHECK-NEXT: sxth z1.d, p0/m, z1.d
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
- %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
- %add = add <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
- %add2 = add <vscale x 2 x i32> %add, %s1s
- %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
- %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
- ret <vscale x 2 x i16> %result
-}
-
-define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z0.d, z0.d, #0xffff
-; CHECK-NEXT: and z1.d, z1.d, #0xffff
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.d, z1.d, z0.d
-; CHECK-NEXT: lsr z0.d, z0.d, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
- %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
- %add = add nuw nsw <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
- %add2 = add nuw nsw <vscale x 2 x i32> %add, %s1s
- %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
- %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
- ret <vscale x 2 x i16> %result
-}
-
-define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: rhadds_v4i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxth z0.s, p0/m, z0.s
-; CHECK-NEXT: sxth z1.s, p0/m, z1.s
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
- %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
- %add = add <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
- %add2 = add <vscale x 4 x i32> %add, %s1s
- %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
- %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
- ret <vscale x 4 x i16> %result
-}
-
-define <vscale x 4 x i16> @rhaddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v4i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: and z0.s, z0.s, #0xffff
-; CHECK-NEXT: and z1.s, z1.s, #0xffff
-; CHECK-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
- %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
- %add = add nuw nsw <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
- %add2 = add nuw nsw <vscale x 4 x i32> %add, %s1s
- %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
- %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
- ret <vscale x 4 x i16> %result
-}
-
-define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
-; CHECK-LABEL: rhadds_v8i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: srhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
- %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
- %add = add <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
- %add2 = add <vscale x 8 x i32> %add, %s1s
- %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
- %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
- ret <vscale x 8 x i16> %result
-}
-
-define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v8i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
- %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
- %add = add nuw nsw <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
- %add2 = add nuw nsw <vscale x 8 x i32> %add, %s1s
- %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
- %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
- ret <vscale x 8 x i16> %result
-}
-
-define <vscale x 4 x i8> @rhadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: rhadds_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
-; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: and z0.s, z0.s, #0xffff
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
- %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
- %add = add <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
- %add2 = add <vscale x 4 x i16> %add, %s1s
- %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
- %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
- ret <vscale x 4 x i8> %result
-}
-
-define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: and z1.s, z1.s, #0xff
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.s, z1.s, z0.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
- %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
- %add = add nuw nsw <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
- %add2 = add nuw nsw <vscale x 4 x i16> %add, %s1s
- %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
- %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
- ret <vscale x 4 x i8> %result
-}
-
-define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: rhadds_v8i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
-; CHECK-NEXT: sxtb z1.h, p0/m, z1.h
-; CHECK-NEXT: eor z0.d, z0.d, z2.d
-; CHECK-NEXT: sub z0.h, z1.h, z0.h
-; CHECK-NEXT: lsr z0.h, z0.h, #1
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
- %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
- %add = add <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
- %add2 = add <vscale x 8 x i16> %add, %s1s
- %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
- %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
- ret <vscale x 8 x i8> %result
-}
-
-define <vscale x 8 x i8> @rhaddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v8i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: and z0.h, z0.h, #0xff
-; CHECK-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEXT: urhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
- %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
- %add = add nuw nsw <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
- %add2 = add nuw nsw <vscale x 8 x i16> %add, %s1s
- %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
- %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
- ret <vscale x 8 x i8> %result
-}
-
-define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
-; CHECK-LABEL: rhadds_v16i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: srhadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-entry:
- %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
- %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
- %add = add <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
- %add2 = add <vscale x 16 x i16> %add, %s1s
- %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
- %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
- ret <vscale x 16 x i8> %result
-}
-
-define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v16i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: urhadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
-entry:
- %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
- %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
- %add = add nuw nsw <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
- %add2 = add nuw nsw <vscale x 16 x i16> %add, %s1s
- %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
- %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
- ret <vscale x 16 x i8> %result
-}
More information about the llvm-commits
mailing list