[llvm] c155fc8 - [AArch64][SVE] Add some hadd sve codegen tests. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 12 06:35:07 PST 2022


Author: David Green
Date: 2022-12-12T14:35:01Z
New Revision: c155fc84142cead6227d369c5e7d5d9a086c9070

URL: https://github.com/llvm/llvm-project/commit/c155fc84142cead6227d369c5e7d5d9a086c9070
DIFF: https://github.com/llvm/llvm-project/commit/c155fc84142cead6227d369c5e7d5d9a086c9070.diff

LOG: [AArch64][SVE] Add some hadd sve codegen tests. NFC

Added: 
    llvm/test/CodeGen/AArch64/sve2-hadd.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/sve2-hadd.ll b/llvm/test/CodeGen/AArch64/sve2-hadd.ll
new file mode 100644
index 000000000000..2d494c43d4ce
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-hadd.ll
@@ -0,0 +1,672 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple aarch64-none-eabi -mattr=+sve2 -o - | FileCheck %s
+
+define <vscale x 2 x i32> @hadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: hadds_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, sxtw]
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %m = add <vscale x 2 x i64> %s0s, %s1s
+  %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %s2
+}
+
+define <vscale x 2 x i32> @haddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: haddu_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, uxtw]
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %m = add <vscale x 2 x i64> %s0s, %s1s
+  %s = lshr <vscale x 2 x i64> %m, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %s2
+}
+
+define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; CHECK-LABEL: hadds_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sunpkhi z2.d, z0.s
+; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    sunpkhi z3.d, z1.s
+; CHECK-NEXT:    sunpklo z1.d, z1.s
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    add z1.d, z2.d, z3.d
+; CHECK-NEXT:    lsr z1.d, z1.d, #1
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %m = add <vscale x 4 x i64> %s0s, %s1s
+  %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %s2
+}
+
+define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; CHECK-LABEL: haddu_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uunpkhi z2.d, z0.s
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    uunpkhi z3.d, z1.s
+; CHECK-NEXT:    uunpklo z1.d, z1.s
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    add z1.d, z2.d, z3.d
+; CHECK-NEXT:    lsr z1.d, z1.d, #1
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %m = add <vscale x 4 x i64> %s0s, %s1s
+  %s = lshr <vscale x 4 x i64> %m, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %s2
+}
+
+define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: hadds_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sxth z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxth z1.d, p0/m, z1.d
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %m = add <vscale x 2 x i32> %s0s, %s1s
+  %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %s2
+}
+
+define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: haddu_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and z0.d, z0.d, #0xffff
+; CHECK-NEXT:    and z1.d, z1.d, #0xffff
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %m = add <vscale x 2 x i32> %s0s, %s1s
+  %s = lshr <vscale x 2 x i32> %m, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %s2
+}
+
+define <vscale x 4 x i16> @hadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: hadds_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %m = add <vscale x 4 x i32> %s0s, %s1s
+  %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %s2
+}
+
+define <vscale x 4 x i16> @haddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: haddu_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    and z1.s, z1.s, #0xffff
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %m = add <vscale x 4 x i32> %s0s, %s1s
+  %s = lshr <vscale x 4 x i32> %m, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %s2
+}
+
+define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; CHECK-LABEL: hadds_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sunpkhi z2.s, z0.h
+; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    sunpkhi z3.s, z1.h
+; CHECK-NEXT:    sunpklo z1.s, z1.h
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    add z1.s, z2.s, z3.s
+; CHECK-NEXT:    lsr z1.s, z1.s, #1
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %m = add <vscale x 8 x i32> %s0s, %s1s
+  %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %s2
+}
+
+define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; CHECK-LABEL: haddu_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uunpkhi z2.s, z0.h
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z3.s, z1.h
+; CHECK-NEXT:    uunpklo z1.s, z1.h
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    add z1.s, z2.s, z3.s
+; CHECK-NEXT:    lsr z1.s, z1.s, #1
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %m = add <vscale x 8 x i32> %s0s, %s1s
+  %s = lshr <vscale x 8 x i32> %m, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %s2
+}
+
+define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: hadds_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %m = add <vscale x 4 x i16> %s0s, %s1s
+  %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %s2
+}
+
+define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: haddu_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and z0.s, z0.s, #0xff
+; CHECK-NEXT:    and z1.s, z1.s, #0xff
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %m = add <vscale x 4 x i16> %s0s, %s1s
+  %s = lshr <vscale x 4 x i16> %m, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %s2
+}
+
+define <vscale x 8 x i8> @hadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: hadds_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %m = add <vscale x 8 x i16> %s0s, %s1s
+  %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %s2
+}
+
+define <vscale x 8 x i8> @haddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: haddu_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %m = add <vscale x 8 x i16> %s0s, %s1s
+  %s = lshr <vscale x 8 x i16> %m, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %s2
+}
+
+define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; CHECK-LABEL: hadds_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sunpkhi z2.h, z0.b
+; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    sunpkhi z3.h, z1.b
+; CHECK-NEXT:    sunpklo z1.h, z1.b
+; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    add z1.h, z2.h, z3.h
+; CHECK-NEXT:    lsr z1.h, z1.h, #1
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %m = add <vscale x 16 x i16> %s0s, %s1s
+  %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %s2
+}
+
+define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; CHECK-LABEL: haddu_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uunpkhi z2.h, z0.b
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpkhi z3.h, z1.b
+; CHECK-NEXT:    uunpklo z1.h, z1.b
+; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    add z1.h, z2.h, z3.h
+; CHECK-NEXT:    lsr z1.h, z1.h, #1
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %m = add <vscale x 16 x i16> %s0s, %s1s
+  %s = lshr <vscale x 16 x i16> %m, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %s2 = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %s2
+}
+
+define <vscale x 2 x i32> @rhadds_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: rhadds_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = sext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %add = add <vscale x 2 x i64> %s0s, %s1s
+  %add2 = add <vscale x 2 x i64> %add, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 2 x i32> @rhaddu_v2i32(<vscale x 2 x i32> %s0, <vscale x 2 x i32> %s1) {
+; CHECK-LABEL: rhaddu_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT:    and z1.d, z1.d, #0xffffffff
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i32> %s0 to <vscale x 2 x i64>
+  %s1s = zext <vscale x 2 x i32> %s1 to <vscale x 2 x i64>
+  %add = add <vscale x 2 x i64> %s0s, %s1s
+  %add2 = add <vscale x 2 x i64> %add, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %s = lshr <vscale x 2 x i64> %add2, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i64> %s to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %result
+}
+
+define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; CHECK-LABEL: rhadds_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sunpkhi z3.d, z0.s
+; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    sunpkhi z4.d, z1.s
+; CHECK-NEXT:    sunpklo z1.d, z1.s
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    sub z1.d, z4.d, z2.d
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    lsr z1.d, z1.d, #1
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = sext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %add = add <vscale x 4 x i64> %s0s, %s1s
+  %add2 = add <vscale x 4 x i64> %add, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) {
+; CHECK-LABEL: rhaddu_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    uunpkhi z3.d, z0.s
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    uunpkhi z4.d, z1.s
+; CHECK-NEXT:    uunpklo z1.d, z1.s
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    sub z1.d, z4.d, z2.d
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    lsr z1.d, z1.d, #1
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i32> %s0 to <vscale x 4 x i64>
+  %s1s = zext <vscale x 4 x i32> %s1 to <vscale x 4 x i64>
+  %add = add <vscale x 4 x i64> %s0s, %s1s
+  %add2 = add <vscale x 4 x i64> %add, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %s = lshr <vscale x 4 x i64> %add2, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i64> %s to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 2 x i16> @rhadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: rhadds_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxth z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxth z1.d, p0/m, z1.d
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %add = add <vscale x 2 x i32> %s0s, %s1s
+  %add2 = add <vscale x 2 x i32> %add, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %result
+}
+
+define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
+; CHECK-LABEL: rhaddu_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    and z0.d, z0.d, #0xffff
+; CHECK-NEXT:    and z1.d, z1.d, #0xffff
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.d, z1.d, z0.d
+; CHECK-NEXT:    lsr z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
+  %s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
+  %add = add <vscale x 2 x i32> %s0s, %s1s
+  %add2 = add <vscale x 2 x i32> %add, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %s = lshr <vscale x 2 x i32> %add2, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+  %result = trunc <vscale x 2 x i32> %s to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %result
+}
+
+define <vscale x 4 x i16> @rhadds_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: rhadds_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = sext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %add = add <vscale x 4 x i32> %s0s, %s1s
+  %add2 = add <vscale x 4 x i32> %add, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %result
+}
+
+define <vscale x 4 x i16> @rhaddu_v4i16(<vscale x 4 x i16> %s0, <vscale x 4 x i16> %s1) {
+; CHECK-LABEL: rhaddu_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    and z1.s, z1.s, #0xffff
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i16> %s0 to <vscale x 4 x i32>
+  %s1s = zext <vscale x 4 x i16> %s1 to <vscale x 4 x i32>
+  %add = add <vscale x 4 x i32> %s0s, %s1s
+  %add2 = add <vscale x 4 x i32> %add, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %s = lshr <vscale x 4 x i32> %add2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i32> %s to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %result
+}
+
+define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; CHECK-LABEL: rhadds_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sunpkhi z3.s, z0.h
+; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    sunpkhi z4.s, z1.h
+; CHECK-NEXT:    sunpklo z1.s, z1.h
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    sub z1.s, z4.s, z2.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    lsr z1.s, z1.s, #1
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = sext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %add = add <vscale x 8 x i32> %s0s, %s1s
+  %add2 = add <vscale x 8 x i32> %add, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) {
+; CHECK-LABEL: rhaddu_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    uunpkhi z3.s, z0.h
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpkhi z4.s, z1.h
+; CHECK-NEXT:    uunpklo z1.s, z1.h
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    sub z1.s, z4.s, z2.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    lsr z1.s, z1.s, #1
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 8 x i16> %s0 to <vscale x 8 x i32>
+  %s1s = zext <vscale x 8 x i16> %s1 to <vscale x 8 x i32>
+  %add = add <vscale x 8 x i32> %s0s, %s1s
+  %add2 = add <vscale x 8 x i32> %add, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %s = lshr <vscale x 8 x i32> %add2, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i32> %s to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 4 x i8> @rhadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: rhadds_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %add = add <vscale x 4 x i16> %s0s, %s1s
+  %add2 = add <vscale x 4 x i16> %add, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %result
+}
+
+define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
+; CHECK-LABEL: rhaddu_v4i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    and z0.s, z0.s, #0xff
+; CHECK-NEXT:    and z1.s, z1.s, #0xff
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.s, z1.s, z0.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
+  %s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
+  %add = add <vscale x 4 x i16> %s0s, %s1s
+  %add2 = add <vscale x 4 x i16> %add, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %s = lshr <vscale x 4 x i16> %add2, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 1, i32 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
+  %result = trunc <vscale x 4 x i16> %s to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %result
+}
+
+define <vscale x 8 x i8> @rhadds_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: rhadds_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.h, z1.h, z0.h
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = sext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %add = add <vscale x 8 x i16> %s0s, %s1s
+  %add2 = add <vscale x 8 x i16> %add, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %result
+}
+
+define <vscale x 8 x i8> @rhaddu_v8i8(<vscale x 8 x i8> %s0, <vscale x 8 x i8> %s1) {
+; CHECK-LABEL: rhaddu_v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    sub z0.h, z1.h, z0.h
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 8 x i8> %s0 to <vscale x 8 x i16>
+  %s1s = zext <vscale x 8 x i8> %s1 to <vscale x 8 x i16>
+  %add = add <vscale x 8 x i16> %s0s, %s1s
+  %add2 = add <vscale x 8 x i16> %add, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %s = lshr <vscale x 8 x i16> %add2, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+  %result = trunc <vscale x 8 x i16> %s to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %result
+}
+
+define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; CHECK-LABEL: rhadds_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sunpkhi z3.h, z0.b
+; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    sunpkhi z4.h, z1.b
+; CHECK-NEXT:    sunpklo z1.h, z1.b
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    sub z0.h, z1.h, z0.h
+; CHECK-NEXT:    sub z1.h, z4.h, z2.h
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    lsr z1.h, z1.h, #1
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+entry:
+  %s0s = sext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = sext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %add = add <vscale x 16 x i16> %s0s, %s1s
+  %add2 = add <vscale x 16 x i16> %add, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %result
+}
+
+define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) {
+; CHECK-LABEL: rhaddu_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov z2.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    uunpkhi z3.h, z0.b
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpkhi z4.h, z1.b
+; CHECK-NEXT:    uunpklo z1.h, z1.b
+; CHECK-NEXT:    eor z0.d, z0.d, z2.d
+; CHECK-NEXT:    eor z2.d, z3.d, z2.d
+; CHECK-NEXT:    sub z0.h, z1.h, z0.h
+; CHECK-NEXT:    sub z1.h, z4.h, z2.h
+; CHECK-NEXT:    lsr z0.h, z0.h, #1
+; CHECK-NEXT:    lsr z1.h, z1.h, #1
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+entry:
+  %s0s = zext <vscale x 16 x i8> %s0 to <vscale x 16 x i16>
+  %s1s = zext <vscale x 16 x i8> %s1 to <vscale x 16 x i16>
+  %add = add <vscale x 16 x i16> %s0s, %s1s
+  %add2 = add <vscale x 16 x i16> %add, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %s = lshr <vscale x 16 x i16> %add2, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 1, i32 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
+  %result = trunc <vscale x 16 x i16> %s to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %result
+}


        


More information about the llvm-commits mailing list