[llvm] 40a51e1 - [AArch64][SVE]: custom lower AVGFloor/AVGCeil.

Hassnaa Hamdi via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 13 12:01:07 PDT 2023


Author: Hassnaa Hamdi
Date: 2023-03-13T19:00:57Z
New Revision: 40a51e1afce989c2adc503cbca2a6cce4a587d84

URL: https://github.com/llvm/llvm-project/commit/40a51e1afce989c2adc503cbca2a6cce4a587d84
DIFF: https://github.com/llvm/llvm-project/commit/40a51e1afce989c2adc503cbca2a6cce4a587d84.diff

LOG: [AArch64][SVE]: custom lower AVGFloor/AVGCeil.

-Lower AVGFloor(A, B) to:
 SRL(A) + SRL(B) + (A&B)&1.
-Lower AVGCeil(A, B) to:
 SRL(A) + SRL(B) + (A|B)&1.

Differential Revision: https://reviews.llvm.org/D143283

Added: 
    llvm/test/CodeGen/AArch64/sve-hadd.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h

Removed: 
    llvm/test/CodeGen/AArch64/sve2-hadd.ll


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4830f12c12c57..f4f16a2e0fb10 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1302,12 +1302,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SDIVREM, VT, Expand);
       setOperationAction(ISD::UDIVREM, VT, Expand);
 
-      if (Subtarget->hasSVE2()) {
-        setOperationAction(ISD::AVGFLOORS, VT, Custom);
-        setOperationAction(ISD::AVGFLOORU, VT, Custom);
-        setOperationAction(ISD::AVGCEILS, VT, Custom);
-        setOperationAction(ISD::AVGCEILU, VT, Custom);
-      }
+      setOperationAction(ISD::AVGFLOORS, VT, Custom);
+      setOperationAction(ISD::AVGFLOORU, VT, Custom);
+      setOperationAction(ISD::AVGCEILS, VT, Custom);
+      setOperationAction(ISD::AVGCEILU, VT, Custom);
     }
 
     // Illegal unpacked integer vector types.
@@ -5977,13 +5975,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
   case ISD::ABDU:
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
   case ISD::AVGFLOORS:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDS_PRED);
+    return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
   case ISD::AVGFLOORU:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDU_PRED);
+    return LowerAVG(Op, DAG, AArch64ISD::HADDU_PRED);
   case ISD::AVGCEILS:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDS_PRED);
+    return LowerAVG(Op, DAG, AArch64ISD::RHADDS_PRED);
   case ISD::AVGCEILU:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDU_PRED);
+    return LowerAVG(Op, DAG, AArch64ISD::RHADDU_PRED);
   case ISD::BITREVERSE:
     return LowerBitreverse(Op, DAG);
   case ISD::BSWAP:
@@ -13244,6 +13242,57 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
   return Chain;
 }
 
+// When x and y are extended, lower:
+//   avgfloor(x, y) -> (x + y) >> 1
+//   avgceil(x, y)  -> (x + y + 1) >> 1
+
+// Otherwise, lower to:
+//   avgfloor(x, y) -> (x >> 1) + (y >> 1) + (x & y & 1)
+//   avgceil(x, y)  -> (x >> 1) + (y >> 1) + ((x || y) & 1)
+SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG,
+                                        unsigned NewOp) const {
+  if (Subtarget->hasSVE2())
+    return LowerToPredicatedOp(Op, DAG, NewOp);
+
+  SDLoc dl(Op);
+  SDValue OpA = Op->getOperand(0);
+  SDValue OpB = Op->getOperand(1);
+  EVT VT = Op.getValueType();
+  bool IsCeil =
+      (Op->getOpcode() == ISD::AVGCEILS || Op->getOpcode() == ISD::AVGCEILU);
+  bool IsSigned =
+      (Op->getOpcode() == ISD::AVGFLOORS || Op->getOpcode() == ISD::AVGCEILS);
+  unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
+
+  assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
+
+  auto IsZeroExtended = [&DAG](SDValue &Node) {
+    KnownBits Known = DAG.computeKnownBits(Node, 0);
+    return Known.Zero.isSignBitSet();
+  };
+
+  auto IsSignExtended = [&DAG](SDValue &Node) {
+    return (DAG.ComputeNumSignBits(Node, 0) > 1);
+  };
+
+  SDValue ConstantOne = DAG.getConstant(1, dl, VT);
+  if ((!IsSigned && IsZeroExtended(OpA) && IsZeroExtended(OpB)) ||
+      (IsSigned && IsSignExtended(OpA) && IsSignExtended(OpB))) {
+    SDValue Add = DAG.getNode(ISD::ADD, dl, VT, OpA, OpB);
+    if (IsCeil)
+      Add = DAG.getNode(ISD::ADD, dl, VT, Add, ConstantOne);
+    return DAG.getNode(ShiftOpc, dl, VT, Add, ConstantOne);
+  }
+
+  SDValue ShiftOpA = DAG.getNode(ShiftOpc, dl, VT, OpA, ConstantOne);
+  SDValue ShiftOpB = DAG.getNode(ShiftOpc, dl, VT, OpB, ConstantOne);
+
+  SDValue tmp = DAG.getNode(IsCeil ? ISD::OR : ISD::AND, dl, VT, OpA, OpB);
+  tmp = DAG.getNode(ISD::AND, dl, VT, tmp, ConstantOne);
+  SDValue Add = DAG.getNode(ISD::ADD, dl, VT, ShiftOpA, ShiftOpB);
+  return DAG.getNode(ISD::ADD, dl, VT, Add, tmp);
+}
+
 SDValue
 AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                                SelectionDAG &DAG) const {

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index de87d0c0c2f57..c1b212776fbb7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1090,6 +1090,7 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
                                          SDValue &Size,
                                          SelectionDAG &DAG) const;
+  SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
 
   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
                                                SelectionDAG &DAG) const;

diff  --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll
new file mode 100644
index 0000000000000..82f9e51d9ca2e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll
@@ -0,0 +1,1295 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu  -mattr=+sve | FileCheck %s -check-prefixes=CHECK,SVE
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu  -mattr=+sve2 | FileCheck %s -check-prefixes=CHECK,SVE2
+
+define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: hadds_v2i64:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.d, z1.d, #1
+; SVE-NEXT:    asr z3.d, z0.d, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v2i64:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+  %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+  %m = add nsw <vscale x 2 x i128> %s0s, %s1s
+  %s = ashr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %s2
+}
+
+define <vscale x 2 x i64> @hadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: hadds_v2i64_lsh:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.d, z1.d, #1
+; SVE-NEXT:    asr z3.d, z0.d, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v2i64_lsh:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+  %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+  %m = add nsw <vscale x 2 x i128> %s0s, %s1s
+  %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %s2
+}
+
+define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: haddu_v2i64:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    lsr z2.d, z1.d, #1
+; SVE-NEXT:    lsr z3.d, z0.d, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v2i64:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+  %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+  %m = add nuw nsw <vscale x 2 x i128> %s0s, %s1s
+  %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %s2
+}
+
+define <vscale x 2 x i32> @hadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; SVE-LABEL: hadds_v2i32:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.d
+; SVE-NEXT:    sxtw z0.d, p0/m, z0.d
+; SVE-NEXT:    adr z0.d, [z0.d, z1.d, sxtw]
+; SVE-NEXT:    asr z0.d, z0.d, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v2i32:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    sxtw z0.d, p0/m, z0.d
+; SVE2-NEXT:    sxtw z1.d, p0/m, z1.d
+; SVE2-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %m = add nsw <vscale x 2 x i64> %s0s, %s1s
+  %s = ashr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %s2
+}
+
+define <vscale x 2 x i32> @hadds_v2i32_lsh(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: hadds_v2i32_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, sxtw]
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %m = add nsw <vscale x 2 x i64> %s0s, %s1s
+  %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %s2
+}
+
+define <vscale x 2 x i32> @haddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; SVE-LABEL: haddu_v2i32:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    and z0.d, z0.d, #0xffffffff
+; SVE-NEXT:    adr z0.d, [z0.d, z1.d, uxtw]
+; SVE-NEXT:    lsr z0.d, z0.d, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v2i32:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    and z0.d, z0.d, #0xffffffff
+; SVE2-NEXT:    and z1.d, z1.d, #0xffffffff
+; SVE2-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %m = add nuw nsw <vscale x 2 x i64> %s0s, %s1s
+  %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %s2
+}
+
+define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: hadds_v4i32:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.s, z1.s, #1
+; SVE-NEXT:    asr z3.s, z0.s, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v4i32:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %m = add nsw <vscale x 4 x i64> %s0s, %s1s
+  %s = ashr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %s2
+}
+
+define <vscale x 4 x i32> @hadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: hadds_v4i32_lsh:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.s, z1.s, #1
+; SVE-NEXT:    asr z3.s, z0.s, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v4i32_lsh:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %m = add nsw <vscale x 4 x i64> %s0s, %s1s
+  %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %s2
+}
+
+define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: haddu_v4i32:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    lsr z2.s, z1.s, #1
+; SVE-NEXT:    lsr z3.s, z0.s, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v4i32:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %m = add nuw nsw <vscale x 4 x i64> %s0s, %s1s
+  %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %s2
+}
+
+define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; SVE-LABEL: hadds_v2i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.d
+; SVE-NEXT:    sxth z0.d, p0/m, z0.d
+; SVE-NEXT:    sxth z1.d, p0/m, z1.d
+; SVE-NEXT:    add z0.d, z0.d, z1.d
+; SVE-NEXT:    asr z0.d, z0.d, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v2i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    sxth z0.d, p0/m, z0.d
+; SVE2-NEXT:    sxth z1.d, p0/m, z1.d
+; SVE2-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %m = add nsw <vscale x 2 x i32> %s0s, %s1s
+  %s = ashr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %s2
+}
+
+define <vscale x 2 x i16> @hadds_v2i16_lsh(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: hadds_v2i16_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sxth z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxth z1.d, p0/m, z1.d
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %m = add nsw <vscale x 2 x i32> %s0s, %s1s
+  %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %s2
+}
+
+define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; SVE-LABEL: haddu_v2i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    and z0.d, z0.d, #0xffff
+; SVE-NEXT:    and z1.d, z1.d, #0xffff
+; SVE-NEXT:    add z0.d, z0.d, z1.d
+; SVE-NEXT:    lsr z0.d, z0.d, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v2i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    and z0.d, z0.d, #0xffff
+; SVE2-NEXT:    and z1.d, z1.d, #0xffff
+; SVE2-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %m = add nuw nsw <vscale x 2 x i32> %s0s, %s1s
+  %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %s2
+}
+
+define <vscale x 4 x i16> @hadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; SVE-LABEL: hadds_v4i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.s
+; SVE-NEXT:    sxth z0.s, p0/m, z0.s
+; SVE-NEXT:    sxth z1.s, p0/m, z1.s
+; SVE-NEXT:    add z0.s, z0.s, z1.s
+; SVE-NEXT:    asr z0.s, z0.s, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v4i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    sxth z0.s, p0/m, z0.s
+; SVE2-NEXT:    sxth z1.s, p0/m, z1.s
+; SVE2-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %m = add nsw <vscale x 4 x i32> %s0s, %s1s
+  %s = ashr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %s2
+}
+
+define <vscale x 4 x i16> @hadds_v4i16_lsh(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: hadds_v4i16_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %m = add nsw <vscale x 4 x i32> %s0s, %s1s
+  %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %s2
+}
+
+define <vscale x 4 x i16> @haddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; SVE-LABEL: haddu_v4i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    and z0.s, z0.s, #0xffff
+; SVE-NEXT:    and z1.s, z1.s, #0xffff
+; SVE-NEXT:    add z0.s, z0.s, z1.s
+; SVE-NEXT:    lsr z0.s, z0.s, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v4i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    and z0.s, z0.s, #0xffff
+; SVE2-NEXT:    and z1.s, z1.s, #0xffff
+; SVE2-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %m = add nuw nsw <vscale x 4 x i32> %s0s, %s1s
+  %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %s2
+}
+
+define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: hadds_v8i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.h, z1.h, #1
+; SVE-NEXT:    asr z3.h, z0.h, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.h, z3.h, z2.h
+; SVE-NEXT:    and z0.h, z0.h, #0x1
+; SVE-NEXT:    add z0.h, z1.h, z0.h
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v8i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    shadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %m = add nsw <vscale x 8 x i32> %s0s, %s1s
+  %s = ashr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %s2
+}
+
+define <vscale x 8 x i16> @hadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: hadds_v8i16_lsh:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.h, z1.h, #1
+; SVE-NEXT:    asr z3.h, z0.h, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.h, z3.h, z2.h
+; SVE-NEXT:    and z0.h, z0.h, #0x1
+; SVE-NEXT:    add z0.h, z1.h, z0.h
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v8i16_lsh:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    shadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %m = add nsw <vscale x 8 x i32> %s0s, %s1s
+  %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %s2
+}
+
+define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: haddu_v8i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    lsr z2.h, z1.h, #1
+; SVE-NEXT:    lsr z3.h, z0.h, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.h, z3.h, z2.h
+; SVE-NEXT:    and z0.h, z0.h, #0x1
+; SVE-NEXT:    add z0.h, z1.h, z0.h
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v8i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    uhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %m = add nuw nsw <vscale x 8 x i32> %s0s, %s1s
+  %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %s2
+}
+
+define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; SVE-LABEL: hadds_v4i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.s
+; SVE-NEXT:    sxtb z0.s, p0/m, z0.s
+; SVE-NEXT:    sxtb z1.s, p0/m, z1.s
+; SVE-NEXT:    add z0.s, z0.s, z1.s
+; SVE-NEXT:    asr z0.s, z0.s, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v4i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    sxtb z0.s, p0/m, z0.s
+; SVE2-NEXT:    sxtb z1.s, p0/m, z1.s
+; SVE2-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %m = add nsw <vscale x 4 x i16> %s0s, %s1s
+  %s = ashr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %s2
+}
+
+define <vscale x 4 x i8> @hadds_v4i8_lsh(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: hadds_v4i8_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %m = add nsw <vscale x 4 x i16> %s0s, %s1s
+  %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %s2
+}
+
+define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; SVE-LABEL: haddu_v4i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    and z0.s, z0.s, #0xff
+; SVE-NEXT:    and z1.s, z1.s, #0xff
+; SVE-NEXT:    add z0.s, z0.s, z1.s
+; SVE-NEXT:    lsr z0.s, z0.s, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v4i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    and z0.s, z0.s, #0xff
+; SVE2-NEXT:    and z1.s, z1.s, #0xff
+; SVE2-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %m = add nuw nsw <vscale x 4 x i16> %s0s, %s1s
+  %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %s2
+}
+
+define <vscale x 8 x i8> @hadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; SVE-LABEL: hadds_v8i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    ptrue p0.h
+; SVE-NEXT:    sxtb z0.h, p0/m, z0.h
+; SVE-NEXT:    sxtb z1.h, p0/m, z1.h
+; SVE-NEXT:    add z0.h, z0.h, z1.h
+; SVE-NEXT:    asr z0.h, z0.h, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v8i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    sxtb z0.h, p0/m, z0.h
+; SVE2-NEXT:    sxtb z1.h, p0/m, z1.h
+; SVE2-NEXT:    shadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %m = add nsw <vscale x 8 x i16> %s0s, %s1s
+  %s = ashr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %s2
+}
+
+define <vscale x 8 x i8> @hadds_v8i8_lsh(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: hadds_v8i8_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %m = add nsw <vscale x 8 x i16> %s0s, %s1s
+  %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %s2
+}
+
+define <vscale x 8 x i8> @haddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; SVE-LABEL: haddu_v8i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    and z0.h, z0.h, #0xff
+; SVE-NEXT:    and z1.h, z1.h, #0xff
+; SVE-NEXT:    add z0.h, z0.h, z1.h
+; SVE-NEXT:    lsr z0.h, z0.h, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v8i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    and z0.h, z0.h, #0xff
+; SVE2-NEXT:    and z1.h, z1.h, #0xff
+; SVE2-NEXT:    uhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %m = add nuw nsw <vscale x 8 x i16> %s0s, %s1s
+  %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %s2
+}
+
+define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: hadds_v16i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.b, z1.b, #1
+; SVE-NEXT:    asr z3.b, z0.b, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.b, z3.b, z2.b
+; SVE-NEXT:    and z0.b, z0.b, #0x1
+; SVE-NEXT:    add z0.b, z1.b, z0.b
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v16i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.b
+; SVE2-NEXT:    shadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %m = add nsw <vscale x 16 x i16> %s0s, %s1s
+  %s = ashr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %s2
+}
+
+define <vscale x 16 x i8> @hadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: hadds_v16i8_lsh:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.b, z1.b, #1
+; SVE-NEXT:    asr z3.b, z0.b, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.b, z3.b, z2.b
+; SVE-NEXT:    and z0.b, z0.b, #0x1
+; SVE-NEXT:    add z0.b, z1.b, z0.b
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: hadds_v16i8_lsh:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.b
+; SVE2-NEXT:    shadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %m = add nsw <vscale x 16 x i16> %s0s, %s1s
+  %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %s2
+}
+
+define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: haddu_v16i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    lsr z2.b, z1.b, #1
+; SVE-NEXT:    lsr z3.b, z0.b, #1
+; SVE-NEXT:    and z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.b, z3.b, z2.b
+; SVE-NEXT:    and z0.b, z0.b, #0x1
+; SVE-NEXT:    add z0.b, z1.b, z0.b
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: haddu_v16i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.b
+; SVE2-NEXT:    uhadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %m = add nuw nsw <vscale x 16 x i16> %s0s, %s1s
+  %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %s2
+}
+
+define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: rhadds_v2i64:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.d, z1.d, #1
+; SVE-NEXT:    asr z3.d, z0.d, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v2i64:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    srhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+  %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+  %add = add <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+  %add2 = add <vscale x 2 x i128> %add, %s1s
+  %s = ashr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %result
+}
+
+define <vscale x 2 x i64> @rhadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: rhadds_v2i64_lsh:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.d, z1.d, #1
+; SVE-NEXT:    asr z3.d, z0.d, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v2i64_lsh:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    srhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+  %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+  %add = add <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+  %add2 = add <vscale x 2 x i128> %add, %s1s
+  %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %result
+}
+
+define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
+; SVE-LABEL: rhaddu_v2i64:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    lsr z2.d, z1.d, #1
+; SVE-NEXT:    lsr z3.d, z0.d, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.d, z3.d, z2.d
+; SVE-NEXT:    and z0.d, z0.d, #0x1
+; SVE-NEXT:    add z0.d, z1.d, z0.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhaddu_v2i64:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    urhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
+  %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
+  %add = add nuw nsw <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+  %add2 = add nuw nsw <vscale x 2 x i128> %add, %s1s
+  %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %result
+}
+
+define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: rhadds_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    asr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %add = add <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %add2 = add <vscale x 2 x i64> %add, %s1s
+  %s = ashr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 2 x i32> @rhadds_v2i32_lsh(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: rhadds_v2i32_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %add = add <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %add2 = add <vscale x 2 x i64> %add, %s1s
+  %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 2 x i32> @rhaddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; SVE-LABEL: rhaddu_v2i32:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; SVE-NEXT:    and z0.d, z0.d, #0xffffffff
+; SVE-NEXT:    and z1.d, z1.d, #0xffffffff
+; SVE-NEXT:    eor z0.d, z0.d, z2.d
+; SVE-NEXT:    sub z0.d, z1.d, z0.d
+; SVE-NEXT:    lsr z0.d, z0.d, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhaddu_v2i32:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.d
+; SVE2-NEXT:    and z0.d, z0.d, #0xffffffff
+; SVE2-NEXT:    and z1.d, z1.d, #0xffffffff
+; SVE2-NEXT:    urhadd z0.d, p0/m, z0.d, z1.d
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %add = add nuw nsw <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %add2 = add nuw nsw <vscale x 2 x i64> %add, %s1s
+  %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: rhadds_v4i32:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.s, z1.s, #1
+; SVE-NEXT:    asr z3.s, z0.s, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v4i32:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    srhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %add = add <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %add2 = add <vscale x 4 x i64> %add, %s1s
+  %s = ashr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 4 x i32> @rhadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: rhadds_v4i32_lsh:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.s, z1.s, #1
+; SVE-NEXT:    asr z3.s, z0.s, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v4i32_lsh:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    srhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %add = add <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %add2 = add <vscale x 4 x i64> %add, %s1s
+  %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; SVE-LABEL: rhaddu_v4i32:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    lsr z2.s, z1.s, #1
+; SVE-NEXT:    lsr z3.s, z0.s, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.s, z3.s, z2.s
+; SVE-NEXT:    and z0.s, z0.s, #0x1
+; SVE-NEXT:    add z0.s, z1.s, z0.s
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhaddu_v4i32:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    urhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %add = add nuw nsw <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %add2 = add nuw nsw <vscale x 4 x i64> %add, %s1s
+  %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 2 x i16> @rhadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: rhadds_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxth z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxth z1.d, p0/m, z1.d
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    asr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %add = add <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %add2 = add <vscale x 2 x i32> %add, %s1s
+  %s = ashr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %result
+}
+
+define <vscale x 2 x i16> @rhadds_v2i16_lsh(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: rhadds_v2i16_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxth z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxth z1.d, p0/m, z1.d
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %add = add <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %add2 = add <vscale x 2 x i32> %add, %s1s
+  %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %result
+}
+
+define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: rhaddu_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    and z0.d, z0.d, #0xffff
+; CHECK-NEXT:    and z1.d, z1.d, #0xffff
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %add = add nuw nsw <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %add2 = add nuw nsw <vscale x 2 x i32> %add, %s1s
+  %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %result
+}
+
+define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: rhadds_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    asr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %add = add <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %add2 = add <vscale x 4 x i32> %add, %s1s
+  %s = ashr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %result
+}
+
+define <vscale x 4 x i16> @rhadds_v4i16_lsh(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: rhadds_v4i16_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %add = add <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %add2 = add <vscale x 4 x i32> %add, %s1s
+  %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %result
+}
+
+define <vscale x 4 x i16> @rhaddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; SVE-LABEL: rhaddu_v4i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; SVE-NEXT:    and z0.s, z0.s, #0xffff
+; SVE-NEXT:    and z1.s, z1.s, #0xffff
+; SVE-NEXT:    eor z0.d, z0.d, z2.d
+; SVE-NEXT:    sub z0.s, z1.s, z0.s
+; SVE-NEXT:    lsr z0.s, z0.s, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhaddu_v4i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.s
+; SVE2-NEXT:    and z0.s, z0.s, #0xffff
+; SVE2-NEXT:    and z1.s, z1.s, #0xffff
+; SVE2-NEXT:    urhadd z0.s, p0/m, z0.s, z1.s
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %add = add nuw nsw <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %add2 = add nuw nsw <vscale x 4 x i32> %add, %s1s
+  %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %result
+}
+
+define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: rhadds_v8i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.h, z1.h, #1
+; SVE-NEXT:    asr z3.h, z0.h, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.h, z3.h, z2.h
+; SVE-NEXT:    and z0.h, z0.h, #0x1
+; SVE-NEXT:    add z0.h, z1.h, z0.h
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v8i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    srhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %add = add <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %add2 = add <vscale x 8 x i32> %add, %s1s
+  %s = ashr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 8 x i16> @rhadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: rhadds_v8i16_lsh:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.h, z1.h, #1
+; SVE-NEXT:    asr z3.h, z0.h, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.h, z3.h, z2.h
+; SVE-NEXT:    and z0.h, z0.h, #0x1
+; SVE-NEXT:    add z0.h, z1.h, z0.h
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v8i16_lsh:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    srhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %add = add <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %add2 = add <vscale x 8 x i32> %add, %s1s
+  %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; SVE-LABEL: rhaddu_v8i16:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    lsr z2.h, z1.h, #1
+; SVE-NEXT:    lsr z3.h, z0.h, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.h, z3.h, z2.h
+; SVE-NEXT:    and z0.h, z0.h, #0x1
+; SVE-NEXT:    add z0.h, z1.h, z0.h
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhaddu_v8i16:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    urhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %add = add nuw nsw <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %add2 = add nuw nsw <vscale x 8 x i32> %add, %s1s
+  %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 4 x i8> @rhadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: rhadds_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    asr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %add = add <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %add2 = add <vscale x 4 x i16> %add, %s1s
+  %s = ashr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %result
+}
+
+define <vscale x 4 x i8> @rhadds_v4i8_lsh(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: rhadds_v4i8_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %add = add <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %add2 = add <vscale x 4 x i16> %add, %s1s
+  %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %result
+}
+
+define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: rhaddu_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    and z0.s, z0.s, #0xff
+; CHECK-NEXT:    and z1.s, z1.s, #0xff
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %add = add nuw nsw <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %add2 = add nuw nsw <vscale x 4 x i16> %add, %s1s
+  %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %result
+}
+
+define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: rhadds_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.h, z1.h, z0.h
+; CHECK-NEXT:    asr z0.h, z0.h, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %add = add <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %add2 = add <vscale x 8 x i16> %add, %s1s
+  %s = ashr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %result
+}
+
+define <vscale x 8 x i8> @rhadds_v8i8_lsh(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: rhadds_v8i8_lsh:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.h, z1.h, z0.h
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %add = add <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %add2 = add <vscale x 8 x i16> %add, %s1s
+  %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %result
+}
+
+define <vscale x 8 x i8> @rhaddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; SVE-LABEL: rhaddu_v8i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
+; SVE-NEXT:    and z0.h, z0.h, #0xff
+; SVE-NEXT:    and z1.h, z1.h, #0xff
+; SVE-NEXT:    eor z0.d, z0.d, z2.d
+; SVE-NEXT:    sub z0.h, z1.h, z0.h
+; SVE-NEXT:    lsr z0.h, z0.h, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhaddu_v8i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.h
+; SVE2-NEXT:    and z0.h, z0.h, #0xff
+; SVE2-NEXT:    and z1.h, z1.h, #0xff
+; SVE2-NEXT:    urhadd z0.h, p0/m, z0.h, z1.h
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %add = add nuw nsw <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %add2 = add nuw nsw <vscale x 8 x i16> %add, %s1s
+  %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %result
+}
+
+define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: rhadds_v16i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.b, z1.b, #1
+; SVE-NEXT:    asr z3.b, z0.b, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.b, z3.b, z2.b
+; SVE-NEXT:    and z0.b, z0.b, #0x1
+; SVE-NEXT:    add z0.b, z1.b, z0.b
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v16i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.b
+; SVE2-NEXT:    srhadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %add = add <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %add2 = add <vscale x 16 x i16> %add, %s1s
+  %s = ashr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %result
+}
+
+define <vscale x 16 x i8> @rhadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: rhadds_v16i8_lsh:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    asr z2.b, z1.b, #1
+; SVE-NEXT:    asr z3.b, z0.b, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.b, z3.b, z2.b
+; SVE-NEXT:    and z0.b, z0.b, #0x1
+; SVE-NEXT:    add z0.b, z1.b, z0.b
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhadds_v16i8_lsh:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.b
+; SVE2-NEXT:    srhadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %add = add <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %add2 = add <vscale x 16 x i16> %add, %s1s
+  %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %result
+}
+
+define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; SVE-LABEL: rhaddu_v16i8:
+; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    lsr z2.b, z1.b, #1
+; SVE-NEXT:    lsr z3.b, z0.b, #1
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    add z1.b, z3.b, z2.b
+; SVE-NEXT:    and z0.b, z0.b, #0x1
+; SVE-NEXT:    add z0.b, z1.b, z0.b
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: rhaddu_v16i8:
+; SVE2:       // %bb.0: // %entry
+; SVE2-NEXT:    ptrue p0.b
+; SVE2-NEXT:    urhadd z0.b, p0/m, z0.b, z1.b
+; SVE2-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %add = add nuw nsw <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %add2 = add nuw nsw <vscale x 16 x i16> %add, %s1s
+  %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %result
+}

diff  --git a/llvm/test/CodeGen/AArch64/sve2-hadd.ll b/llvm/test/CodeGen/AArch64/sve2-hadd.ll
deleted file mode 100644
index 9363412ff06ba..0000000000000
--- a/llvm/test/CodeGen/AArch64/sve2-hadd.ll
+++ /dev/null
@@ -1,627 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple aarch64-none-eabi -mattr=+sve2 -o - | FileCheck %s
-
-define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
-; CHECK-LABEL: hadds_v2i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
-  %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
-  %m = add nsw <vscale x 2 x i128> %s0s, %s1s
-  %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
-  ret <vscale x 2 x i64> %s2
-}
-
-define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
-; CHECK-LABEL: haddu_v2i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
-  %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
-  %m = add nuw nsw <vscale x 2 x i128> %s0s, %s1s
-  %s = lshr <vscale x 2 x i128> %m, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
-  ret <vscale x 2 x i64> %s2
-}
-
-define <vscale x 2 x i32> @hadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: hadds_v2i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
-; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, sxtw]
-; CHECK-NEXT:    lsr z0.d, z0.d, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
-  %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
-  %m = add nsw <vscale x 2 x i64> %s0s, %s1s
-  %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
-  ret <vscale x 2 x i32> %s2
-}
-
-define <vscale x 2 x i32> @haddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: haddu_v2i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
-; CHECK-NEXT:    and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
-  %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
-  %m = add nuw nsw <vscale x 2 x i64> %s0s, %s1s
-  %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
-  ret <vscale x 2 x i32> %s2
-}
-
-define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
-; CHECK-LABEL: hadds_v4i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
-  %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
-  %m = add nsw <vscale x 4 x i64> %s0s, %s1s
-  %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
-  ret <vscale x 4 x i32> %s2
-}
-
-define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
-; CHECK-LABEL: haddu_v4i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
-  %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
-  %m = add nuw nsw <vscale x 4 x i64> %s0s, %s1s
-  %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
-  ret <vscale x 4 x i32> %s2
-}
-
-define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: hadds_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    sxth z0.d, p0/m, z0.d
-; CHECK-NEXT:    sxth z1.d, p0/m, z1.d
-; CHECK-NEXT:    add z0.d, z0.d, z1.d
-; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
-; CHECK-NEXT:    lsr z0.d, z0.d, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
-  %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
-  %m = add nsw <vscale x 2 x i32> %s0s, %s1s
-  %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
-  ret <vscale x 2 x i16> %s2
-}
-
-define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: haddu_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    and z0.d, z0.d, #0xffff
-; CHECK-NEXT:    and z1.d, z1.d, #0xffff
-; CHECK-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
-  %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
-  %m = add nuw nsw <vscale x 2 x i32> %s0s, %s1s
-  %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
-  ret <vscale x 2 x i16> %s2
-}
-
-define <vscale x 4 x i16> @hadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: hadds_v4i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
-; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
-; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    lsr z0.s, z0.s, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
-  %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
-  %m = add nsw <vscale x 4 x i32> %s0s, %s1s
-  %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
-  ret <vscale x 4 x i16> %s2
-}
-
-define <vscale x 4 x i16> @haddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: haddu_v4i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    and z0.s, z0.s, #0xffff
-; CHECK-NEXT:    and z1.s, z1.s, #0xffff
-; CHECK-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
-  %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
-  %m = add nuw nsw <vscale x 4 x i32> %s0s, %s1s
-  %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
-  ret <vscale x 4 x i16> %s2
-}
-
-define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
-; CHECK-LABEL: hadds_v8i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    shadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
-  %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
-  %m = add nsw <vscale x 8 x i32> %s0s, %s1s
-  %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
-  ret <vscale x 8 x i16> %s2
-}
-
-define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
-; CHECK-LABEL: haddu_v8i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    uhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
-  %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
-  %m = add nuw nsw <vscale x 8 x i32> %s0s, %s1s
-  %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
-  ret <vscale x 8 x i16> %s2
-}
-
-define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: hadds_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
-; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    and z0.s, z0.s, #0xffff
-; CHECK-NEXT:    lsr z0.s, z0.s, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
-  %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
-  %m = add nsw <vscale x 4 x i16> %s0s, %s1s
-  %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
-  ret <vscale x 4 x i8> %s2
-}
-
-define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: haddu_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    and z0.s, z0.s, #0xff
-; CHECK-NEXT:    and z1.s, z1.s, #0xff
-; CHECK-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
-  %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
-  %m = add nuw nsw <vscale x 4 x i16> %s0s, %s1s
-  %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
-  ret <vscale x 4 x i8> %s2
-}
-
-define <vscale x 8 x i8> @hadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: hadds_v8i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
-; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
-; CHECK-NEXT:    add z0.h, z0.h, z1.h
-; CHECK-NEXT:    lsr z0.h, z0.h, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
-  %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
-  %m = add nsw <vscale x 8 x i16> %s0s, %s1s
-  %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
-  ret <vscale x 8 x i8> %s2
-}
-
-define <vscale x 8 x i8> @haddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: haddu_v8i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    and z0.h, z0.h, #0xff
-; CHECK-NEXT:    and z1.h, z1.h, #0xff
-; CHECK-NEXT:    uhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
-  %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
-  %m = add nuw nsw <vscale x 8 x i16> %s0s, %s1s
-  %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
-  ret <vscale x 8 x i8> %s2
-}
-
-define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
-; CHECK-LABEL: hadds_v16i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    shadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
-  %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
-  %m = add nsw <vscale x 16 x i16> %s0s, %s1s
-  %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
-  ret <vscale x 16 x i8> %s2
-}
-
-define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
-; CHECK-LABEL: haddu_v16i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    uhadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
-  %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
-  %m = add nuw nsw <vscale x 16 x i16> %s0s, %s1s
-  %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
-  %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
-  ret <vscale x 16 x i8> %s2
-}
-
-define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
-; CHECK-LABEL: rhadds_v2i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    srhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
-  %s1s = sext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
-  %add = add <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
-  %add2 = add <vscale x 2 x i128> %add, %s1s
-  %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
-  %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
-  ret <vscale x 2 x i64> %result
-}
-
-define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) {
-; CHECK-LABEL: rhaddu_v2i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    urhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 2 x i64> %s0 to <vscale x 2 x i128>
-  %s1s = zext <vscale x 2 x i64> %s1 to <vscale x 2 x i128>
-  %add = add nuw nsw <vscale x 2 x i128> %s0s, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
-  %add2 = add nuw nsw <vscale x 2 x i128> %add, %s1s
-  %s = lshr <vscale x 2 x i128> %add2, shufflevector (<vscale x 2 x i128> insertelement (<vscale x 2 x i128> poison, i128 1, i32 0), <vscale x 2 x i128> poison, <vscale x 2 x i32> zeroinitializer)
-  %result = trunc <vscale x 2 x i128> %s to <vscale x 2 x i64>
-  ret <vscale x 2 x i64> %result
-}
-
-define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: rhadds_v2i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
-; CHECK-NEXT:    sxtw z1.d, p0/m, z1.d
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.d, z1.d, z0.d
-; CHECK-NEXT:    lsr z0.d, z0.d, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
-  %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
-  %add = add <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-  %add2 = add <vscale x 2 x i64> %add, %s1s
-  %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-  %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
-  ret <vscale x 2 x i32> %result
-}
-
-define <vscale x 2 x i32> @rhaddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
-; CHECK-LABEL: rhaddu_v2i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
-; CHECK-NEXT:    and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT:    urhadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
-  %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
-  %add = add nuw nsw <vscale x 2 x i64> %s0s, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-  %add2 = add nuw nsw <vscale x 2 x i64> %add, %s1s
-  %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-  %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
-  ret <vscale x 2 x i32> %result
-}
-
-define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
-; CHECK-LABEL: rhadds_v4i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    srhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
-  %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
-  %add = add <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-  %add2 = add <vscale x 4 x i64> %add, %s1s
-  %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-  %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
-  ret <vscale x 4 x i32> %result
-}
-
-define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
-; CHECK-LABEL: rhaddu_v4i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    urhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
-  %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
-  %add = add nuw nsw <vscale x 4 x i64> %s0s, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-  %add2 = add nuw nsw <vscale x 4 x i64> %add, %s1s
-  %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-  %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
-  ret <vscale x 4 x i32> %result
-}
-
-define <vscale x 2 x i16> @rhadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: rhadds_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sxth z0.d, p0/m, z0.d
-; CHECK-NEXT:    sxth z1.d, p0/m, z1.d
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.d, z1.d, z0.d
-; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
-; CHECK-NEXT:    lsr z0.d, z0.d, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
-  %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
-  %add = add <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
-  %add2 = add <vscale x 2 x i32> %add, %s1s
-  %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
-  %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
-  ret <vscale x 2 x i16> %result
-}
-
-define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v2i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    and z0.d, z0.d, #0xffff
-; CHECK-NEXT:    and z1.d, z1.d, #0xffff
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.d, z1.d, z0.d
-; CHECK-NEXT:    lsr z0.d, z0.d, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
-  %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
-  %add = add nuw nsw <vscale x 2 x i32> %s0s, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
-  %add2 = add nuw nsw <vscale x 2 x i32> %add, %s1s
-  %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
-  %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
-  ret <vscale x 2 x i16> %result
-}
-
-define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: rhadds_v4i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
-; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.s, z1.s, z0.s
-; CHECK-NEXT:    lsr z0.s, z0.s, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
-  %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
-  %add = add <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-  %add2 = add <vscale x 4 x i32> %add, %s1s
-  %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-  %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
-  ret <vscale x 4 x i16> %result
-}
-
-define <vscale x 4 x i16> @rhaddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v4i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    and z0.s, z0.s, #0xffff
-; CHECK-NEXT:    and z1.s, z1.s, #0xffff
-; CHECK-NEXT:    urhadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
-  %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
-  %add = add nuw nsw <vscale x 4 x i32> %s0s, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-  %add2 = add nuw nsw <vscale x 4 x i32> %add, %s1s
-  %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-  %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
-  ret <vscale x 4 x i16> %result
-}
-
-define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
-; CHECK-LABEL: rhadds_v8i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    srhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
-  %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
-  %add = add <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
-  %add2 = add <vscale x 8 x i32> %add, %s1s
-  %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
-  %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
-  ret <vscale x 8 x i16> %result
-}
-
-define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
-; CHECK-LABEL: rhaddu_v8i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    urhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
-  %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
-  %add = add nuw nsw <vscale x 8 x i32> %s0s, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
-  %add2 = add nuw nsw <vscale x 8 x i32> %add, %s1s
-  %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
-  %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
-  ret <vscale x 8 x i16> %result
-}
-
-define <vscale x 4 x i8> @rhadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: rhadds_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
-; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.s, z1.s, z0.s
-; CHECK-NEXT:    and z0.s, z0.s, #0xffff
-; CHECK-NEXT:    lsr z0.s, z0.s, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
-  %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
-  %add = add <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
-  %add2 = add <vscale x 4 x i16> %add, %s1s
-  %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
-  %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
-  ret <vscale x 4 x i8> %result
-}
-
-define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v4i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    and z0.s, z0.s, #0xff
-; CHECK-NEXT:    and z1.s, z1.s, #0xff
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.s, z1.s, z0.s
-; CHECK-NEXT:    lsr z0.s, z0.s, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
-  %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
-  %add = add nuw nsw <vscale x 4 x i16> %s0s, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
-  %add2 = add nuw nsw <vscale x 4 x i16> %add, %s1s
-  %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
-  %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
-  ret <vscale x 4 x i8> %result
-}
-
-define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: rhadds_v8i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
-; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
-; CHECK-NEXT:    eor z0.d, z0.d, z2.d
-; CHECK-NEXT:    sub z0.h, z1.h, z0.h
-; CHECK-NEXT:    lsr z0.h, z0.h, #1
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
-  %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
-  %add = add <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
-  %add2 = add <vscale x 8 x i16> %add, %s1s
-  %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
-  %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
-  ret <vscale x 8 x i8> %result
-}
-
-define <vscale x 8 x i8> @rhaddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v8i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    and z0.h, z0.h, #0xff
-; CHECK-NEXT:    and z1.h, z1.h, #0xff
-; CHECK-NEXT:    urhadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
-  %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
-  %add = add nuw nsw <vscale x 8 x i16> %s0s, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
-  %add2 = add nuw nsw <vscale x 8 x i16> %add, %s1s
-  %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
-  %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
-  ret <vscale x 8 x i8> %result
-}
-
-define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
-; CHECK-LABEL: rhadds_v16i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    srhadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    ret
-entry:
-  %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
-  %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
-  %add = add <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
-  %add2 = add <vscale x 16 x i16> %add, %s1s
-  %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
-  %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
-  ret <vscale x 16 x i8> %result
-}
-
-define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
-; CHECK-LABEL: rhaddu_v16i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    urhadd z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    ret
-entry:
-  %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
-  %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
-  %add = add nuw nsw <vscale x 16 x i16> %s0s, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
-  %add2 = add nuw nsw <vscale x 16 x i16> %add, %s1s
-  %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
-  %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
-  ret <vscale x 16 x i8> %result
-}


        


More information about the llvm-commits mailing list