[llvm-branch-commits] [llvm] [AArch64][ISel] Select constructive SVE2 ext instruction (PR #151730)

Gaëtan Bossu via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Aug 1 09:56:35 PDT 2025


https://github.com/gbossu created https://github.com/llvm/llvm-project/pull/151730

👉 This is a chained PR. Predecessor is https://github.com/llvm/llvm-project/pull/151729

This adds patterns for selecting EXT_ZZI_B.

They are tested for fixed vectors using extract shuffles, and for
scalable vectors using llvm.vector.splice intrinsics.

We will get better codegen when enabling subreg liveness. Without it,
any use of a zpr2 tuple is always considered as using both zpr registers
of the pair.

>From f573d2e983e34a2f99a37976d4956e7aa7c62acd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Fri, 1 Aug 2025 11:32:58 +0000
Subject: [PATCH] [AArch64][ISel] Select constructive SVE2 ext instruction

This adds patterns for selecting EXT_ZZI_B.

They are tested for fixed vectors using extract shuffles, and for
scalable vectors using llvm.vector.splice intrinsics.

We will get better codegen when enabling subreg liveness. Without it,
any use of a zpr2 tuple is always considered as using both zpr registers
of the pair.
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   16 +
 .../AArch64/get-active-lane-mask-extract.ll   |   10 +-
 .../sve-fixed-length-partial-reduce.ll        |   91 +-
 llvm/test/CodeGen/AArch64/sve-pr92779.ll      |   17 +-
 ...e-streaming-mode-fixed-length-ext-loads.ll |   24 +-
 ...ing-mode-fixed-length-extract-subvector.ll |   32 +-
 ...aming-mode-fixed-length-fp-extend-trunc.ll |   32 +-
 ...sve-streaming-mode-fixed-length-int-div.ll |  504 ++---
 ...streaming-mode-fixed-length-int-extends.ll | 1826 +++++++++++------
 ...sve-streaming-mode-fixed-length-int-rem.ll |  624 +++---
 ...reaming-mode-fixed-length-limit-duplane.ll |   22 +-
 ...-streaming-mode-fixed-length-reductions.ll |   96 +-
 .../test/CodeGen/AArch64/sve-vector-splice.ll |  253 +++
 .../sve2-fixed-length-extract-subvector.ll    |   79 +-
 14 files changed, 2236 insertions(+), 1390 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-vector-splice.ll

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 0c4b4f4c3ed88..201dd93302d7a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4069,6 +4069,22 @@ let Predicates = [HasSVE2_or_SME] in {
   let AddedComplexity = 2 in {
     def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32 imm0_255:$imm))),
               (EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1), imm0_255:$imm)>;
+
+    foreach VT = [nxv16i8] in
+      def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_255 i32:$index)))),
+                (EXT_ZZI_B  (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
+
+    foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in
+      def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_127 i32:$index)))),
+                (EXT_ZZI_B  (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
+
+    foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in
+      def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_63 i32:$index)))),
+                (EXT_ZZI_B  (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
+
+    foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in
+      def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_31 i32:$index)))),
+                (EXT_ZZI_B  (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
   }
 } // End HasSVE2_or_SME
 
diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index 50975d16c7e9e..13bec605839a9 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -192,7 +192,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SVE2p1-NEXT:    mov z1.s, p0/z, #1 // =0x1
 ; CHECK-SVE2p1-NEXT:    fmov s0, w8
 ; CHECK-SVE2p1-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-SVE2p1-NEXT:    ext z1.b, z1.b, z0.b, #8
+; CHECK-SVE2p1-NEXT:    ext z1.b, { z1.b, z2.b }, #8
 ; CHECK-SVE2p1-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SVE2p1-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-SVE2p1-NEXT:    b use
@@ -202,12 +202,12 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SME2-NEXT:    whilelo p0.s, x0, x1
 ; CHECK-SME2-NEXT:    cset w8, mi
 ; CHECK-SME2-NEXT:    mov z1.s, p0/z, #1 // =0x1
-; CHECK-SME2-NEXT:    fmov s2, w8
+; CHECK-SME2-NEXT:    fmov s3, w8
 ; CHECK-SME2-NEXT:    mov z0.s, z1.s[1]
-; CHECK-SME2-NEXT:    zip1 z0.s, z2.s, z0.s
-; CHECK-SME2-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-SME2-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-SME2-NEXT:    ext z1.b, { z1.b, z2.b }, #8
 ; CHECK-SME2-NEXT:    // kill: def $d1 killed $d1 killed $z1
+; CHECK-SME2-NEXT:    zip1 z0.s, z3.s, z0.s
+; CHECK-SME2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-SME2-NEXT:    b use
     %r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
     %v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
index 33d5ac4cd299e..3e8b3a40467dd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
@@ -109,14 +109,13 @@ define <16 x i16> @two_way_i8_i16_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-LABEL: two_way_i8_i16_vl256:
 ; SME:       // %bb.0:
 ; SME-NEXT:    ldr z0, [x0]
-; SME-NEXT:    ldr z1, [x1]
-; SME-NEXT:    ldr z2, [x2]
-; SME-NEXT:    umlalb z0.h, z2.b, z1.b
-; SME-NEXT:    umlalt z0.h, z2.b, z1.b
-; SME-NEXT:    mov z1.d, z0.d
-; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT:    ldr z2, [x1]
+; SME-NEXT:    ldr z3, [x2]
+; SME-NEXT:    umlalb z0.h, z3.b, z2.b
+; SME-NEXT:    umlalt z0.h, z3.b, z2.b
+; SME-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT:    mov z1.d, z2.d
 ; SME-NEXT:    ret
   %acc = load <16 x i16>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -232,14 +231,13 @@ define <8 x i32> @two_way_i16_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-LABEL: two_way_i16_i32_vl256:
 ; SME:       // %bb.0:
 ; SME-NEXT:    ldr z0, [x0]
-; SME-NEXT:    ldr z1, [x1]
-; SME-NEXT:    ldr z2, [x2]
-; SME-NEXT:    umlalb z0.s, z2.h, z1.h
-; SME-NEXT:    umlalt z0.s, z2.h, z1.h
-; SME-NEXT:    mov z1.d, z0.d
-; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT:    ldr z2, [x1]
+; SME-NEXT:    ldr z3, [x2]
+; SME-NEXT:    umlalb z0.s, z3.h, z2.h
+; SME-NEXT:    umlalt z0.s, z3.h, z2.h
+; SME-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT:    mov z1.d, z2.d
 ; SME-NEXT:    ret
   %acc = load <8 x i32>, ptr %accptr
   %u = load <16 x i16>, ptr %uptr
@@ -355,14 +353,13 @@ define <4 x i64> @two_way_i32_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-LABEL: two_way_i32_i64_vl256:
 ; SME:       // %bb.0:
 ; SME-NEXT:    ldr z0, [x0]
-; SME-NEXT:    ldr z1, [x1]
-; SME-NEXT:    ldr z2, [x2]
-; SME-NEXT:    umlalb z0.d, z2.s, z1.s
-; SME-NEXT:    umlalt z0.d, z2.s, z1.s
-; SME-NEXT:    mov z1.d, z0.d
-; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT:    ldr z2, [x1]
+; SME-NEXT:    ldr z3, [x2]
+; SME-NEXT:    umlalb z0.d, z3.s, z2.s
+; SME-NEXT:    umlalt z0.d, z3.s, z2.s
+; SME-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT:    mov z1.d, z2.d
 ; SME-NEXT:    ret
   %acc = load <4 x i64>, ptr %accptr
   %u = load <8 x i32>, ptr %uptr
@@ -644,13 +641,12 @@ define <8 x i32> @four_way_i8_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-LABEL: four_way_i8_i32_vl256:
 ; SME:       // %bb.0:
 ; SME-NEXT:    ldr z0, [x0]
-; SME-NEXT:    ldr z1, [x1]
-; SME-NEXT:    ldr z2, [x2]
-; SME-NEXT:    udot z0.s, z2.b, z1.b
-; SME-NEXT:    mov z1.d, z0.d
-; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT:    ldr z2, [x1]
+; SME-NEXT:    ldr z3, [x2]
+; SME-NEXT:    udot z0.s, z3.b, z2.b
+; SME-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT:    mov z1.d, z2.d
 ; SME-NEXT:    ret
   %acc = load <8 x i32>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -689,13 +685,12 @@ define <8 x i32> @four_way_i8_i32_vl256_usdot(ptr %accptr, ptr %uptr, ptr %sptr)
 ; SME-LABEL: four_way_i8_i32_vl256_usdot:
 ; SME:       // %bb.0:
 ; SME-NEXT:    ldr z0, [x0]
-; SME-NEXT:    ldr z1, [x1]
-; SME-NEXT:    ldr z2, [x2]
-; SME-NEXT:    usdot z0.s, z1.b, z2.b
-; SME-NEXT:    mov z1.d, z0.d
-; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT:    ldr z2, [x1]
+; SME-NEXT:    ldr z3, [x2]
+; SME-NEXT:    usdot z0.s, z2.b, z3.b
+; SME-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT:    mov z1.d, z2.d
 ; SME-NEXT:    ret
   %acc = load <8 x i32>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -822,13 +817,12 @@ define <4 x i64> @four_way_i16_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vsca
 ; SME-LABEL: four_way_i16_i64_vl256:
 ; SME:       // %bb.0:
 ; SME-NEXT:    ldr z0, [x0]
-; SME-NEXT:    ldr z1, [x1]
-; SME-NEXT:    ldr z2, [x2]
-; SME-NEXT:    udot z0.d, z2.h, z1.h
-; SME-NEXT:    mov z1.d, z0.d
-; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT:    ldr z2, [x1]
+; SME-NEXT:    ldr z3, [x2]
+; SME-NEXT:    udot z0.d, z3.h, z2.h
+; SME-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT:    mov z1.d, z2.d
 ; SME-NEXT:    ret
   %acc = load <4 x i64>, ptr %accptr
   %u = load <16 x i16>, ptr %uptr
@@ -999,10 +993,9 @@ define <4 x i64> @four_way_i8_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-NEXT:    ldr z0, [x0]
 ; SME-NEXT:    uaddwb z0.d, z0.d, z2.s
 ; SME-NEXT:    uaddwt z0.d, z0.d, z2.s
-; SME-NEXT:    mov z1.d, z0.d
-; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT:    mov z1.d, z2.d
 ; SME-NEXT:    ret
   %acc = load <4 x i64>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
diff --git a/llvm/test/CodeGen/AArch64/sve-pr92779.ll b/llvm/test/CodeGen/AArch64/sve-pr92779.ll
index 3f34d79b3bb49..427d3903cf2e9 100644
--- a/llvm/test/CodeGen/AArch64/sve-pr92779.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pr92779.ll
@@ -5,16 +5,15 @@ define void @main(ptr %0) {
 ; CHECK-LABEL: main:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uzp1 v0.2s, v1.2s, v0.2s
-; CHECK-NEXT:    neg v0.2s, v0.2s
-; CHECK-NEXT:    smov x8, v0.s[0]
-; CHECK-NEXT:    smov x9, v0.s[1]
-; CHECK-NEXT:    mov z1.d, p0/m, x8
-; CHECK-NEXT:    mov z1.d, p0/m, x9
-; CHECK-NEXT:    str z1, [x0]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; CHECK-NEXT:    uzp1 v2.2s, v0.2s, v2.2s
+; CHECK-NEXT:    neg v2.2s, v2.2s
+; CHECK-NEXT:    smov x8, v2.s[0]
+; CHECK-NEXT:    smov x9, v2.s[1]
+; CHECK-NEXT:    mov z0.d, p0/m, x8
+; CHECK-NEXT:    mov z0.d, p0/m, x9
+; CHECK-NEXT:    str z0, [x0]
 ; CHECK-NEXT:    ret
 "entry":
   %1 = bitcast <vscale x 2 x i64> zeroinitializer to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 4d524bc848de6..6fe6b8a1c48d0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s
 ; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
@@ -228,25 +228,25 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) {
 ; CHECK-LABEL: load_sext_v4i32i256:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    sunpklo z1.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    sunpklo z2.d, z0.s
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    fmov x9, d1
-; CHECK-NEXT:    mov z1.d, z1.d[1]
-; CHECK-NEXT:    fmov x11, d0
-; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    fmov x9, d2
+; CHECK-NEXT:    mov z2.d, z2.d[1]
 ; CHECK-NEXT:    asr x10, x9, #63
+; CHECK-NEXT:    fmov x11, d2
 ; CHECK-NEXT:    stp x9, x10, [x8]
-; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    fmov x9, d0
+; CHECK-NEXT:    mov z0.d, z0.d[1]
 ; CHECK-NEXT:    asr x12, x11, #63
 ; CHECK-NEXT:    stp x10, x10, [x8, #16]
-; CHECK-NEXT:    stp x11, x12, [x8, #64]
+; CHECK-NEXT:    stp x11, x12, [x8, #32]
 ; CHECK-NEXT:    fmov x11, d0
 ; CHECK-NEXT:    asr x10, x9, #63
-; CHECK-NEXT:    stp x12, x12, [x8, #80]
-; CHECK-NEXT:    stp x10, x10, [x8, #48]
+; CHECK-NEXT:    stp x12, x12, [x8, #48]
+; CHECK-NEXT:    stp x10, x10, [x8, #80]
 ; CHECK-NEXT:    asr x12, x11, #63
-; CHECK-NEXT:    stp x9, x10, [x8, #32]
+; CHECK-NEXT:    stp x9, x10, [x8, #64]
 ; CHECK-NEXT:    stp x12, x12, [x8, #112]
 ; CHECK-NEXT:    stp x11, x12, [x8, #96]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index 35dd827bbabc5..7ef35f153f029 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s
 ; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
@@ -78,8 +78,8 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: extract_subvector_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -119,7 +119,7 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -138,8 +138,8 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
 define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: extract_subvector_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -198,8 +198,8 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
 define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: extract_subvector_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -237,8 +237,8 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: extract_subvector_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -297,8 +297,8 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
 define <4 x half> @extract_subvector_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: extract_subvector_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -357,8 +357,8 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
 define <2 x float> @extract_subvector_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: extract_subvector_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -396,8 +396,8 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) {
 define <1 x double> @extract_subvector_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: extract_subvector_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index e3d0a72c74b87..bc9b0373d8e49 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s
 ; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
@@ -74,14 +74,14 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
 define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v8f16_to_v8f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z1.s, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #8
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    fcvt z1.s, p0/m, z1.h
+; CHECK-NEXT:    uunpklo z1.s, z2.h
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
-; CHECK-NEXT:    stp q1, q0, [x0]
+; CHECK-NEXT:    fcvt z1.s, p0/m, z1.h
+; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvt_v8f16_to_v8f32:
@@ -122,21 +122,21 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
 define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v16f16_to_v16f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    uunpklo z3.s, z0.h
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT:    ext z0.b, { z1.b, z2.b }, #8
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
+; CHECK-NEXT:    ext z5.b, { z3.b, z4.b }, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    fcvt z2.s, p0/m, z2.h
-; CHECK-NEXT:    fcvt z3.s, p0/m, z3.h
+; CHECK-NEXT:    uunpklo z2.s, z3.h
 ; CHECK-NEXT:    fcvt z1.s, p0/m, z1.h
+; CHECK-NEXT:    uunpklo z3.s, z5.h
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
-; CHECK-NEXT:    stp q3, q0, [x0]
-; CHECK-NEXT:    stp q2, q1, [x0, #32]
+; CHECK-NEXT:    fcvt z2.s, p0/m, z2.h
+; CHECK-NEXT:    fcvt z3.s, p0/m, z3.h
+; CHECK-NEXT:    stp q1, q0, [x0, #32]
+; CHECK-NEXT:    stp q2, q3, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index ae7c676172867..0e34b2cd09fe1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -58,21 +58,21 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    sunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.h, z1.b
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sunpklo z2.s, z1.h
-; CHECK-NEXT:    sunpklo z3.s, z0.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    sunpklo z5.s, z3.h
+; CHECK-NEXT:    sunpklo z0.s, z1.h
+; CHECK-NEXT:    ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    ext z2.b, { z3.b, z4.b }, #8
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    sunpklo z2.s, z2.h
+; CHECK-NEXT:    sdivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT:    sdivr z1.s, p0/m, z1.s, z2.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -124,40 +124,40 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z2.h, z1.b
-; CHECK-NEXT:    sunpklo z3.h, z0.b
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT:    sunpklo z5.h, z1.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z4.s, z2.h
-; CHECK-NEXT:    sunpklo z5.s, z3.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z1.h, z1.b
-; CHECK-NEXT:    sunpklo z2.s, z2.h
-; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
-; CHECK-NEXT:    sunpklo z5.s, z0.h
-; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    sunpklo z3.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    sunpklo z16.h, z3.b
+; CHECK-NEXT:    ext z2.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    sunpklo z0.s, z5.h
+; CHECK-NEXT:    ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    sunpklo z3.h, z1.b
+; CHECK-NEXT:    sunpklo z1.h, z2.b
+; CHECK-NEXT:    ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    sunpklo z7.s, z16.h
+; CHECK-NEXT:    sunpklo z5.s, z5.h
+; CHECK-NEXT:    sunpklo z6.s, z6.h
+; CHECK-NEXT:    sdivr z0.s, p0/m, z0.s, z7.s
+; CHECK-NEXT:    sunpklo z7.s, z1.h
+; CHECK-NEXT:    ext z1.b, { z1.b, z2.b }, #8
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
+; CHECK-NEXT:    sunpklo z6.s, z3.h
+; CHECK-NEXT:    ext z3.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    sunpklo z2.s, z3.h
+; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
+; CHECK-NEXT:    sdiv z1.s, p0/m, z1.s, z2.s
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z6.h, z6.h
+; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
 ; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
@@ -241,73 +241,73 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-LABEL: sdiv_v32i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q6, q3, [x1]
+; CHECK-NEXT:    ldp q18, q4, [x1]
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ldr q2, [x0, #16]
-; CHECK-NEXT:    sunpklo z1.h, z3.b
-; CHECK-NEXT:    sunpklo z4.h, z2.b
-; CHECK-NEXT:    sunpklo z7.h, z6.b
-; CHECK-NEXT:    sunpklo z0.s, z1.h
-; CHECK-NEXT:    sunpklo z5.s, z4.h
-; CHECK-NEXT:    sunpklo z17.s, z7.h
-; CHECK-NEXT:    sdivr z0.s, p0/m, z0.s, z5.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
+; CHECK-NEXT:    sunpklo z6.h, z4.b
+; CHECK-NEXT:    sunpklo z16.h, z2.b
+; CHECK-NEXT:    ext z4.b, { z4.b, z5.b }, #8
+; CHECK-NEXT:    ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    sunpklo z20.h, z18.b
+; CHECK-NEXT:    ext z18.b, { z18.b, z19.b }, #8
+; CHECK-NEXT:    sunpklo z3.h, z4.b
+; CHECK-NEXT:    sunpklo z0.s, z6.h
+; CHECK-NEXT:    sunpklo z1.s, z16.h
+; CHECK-NEXT:    sdivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ext z1.b, { z6.b, z7.b }, #8
+; CHECK-NEXT:    ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    ldr q16, [x0]
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    sunpklo z4.s, z4.h
-; CHECK-NEXT:    sunpklo z3.h, z3.b
-; CHECK-NEXT:    sunpklo z7.s, z7.h
-; CHECK-NEXT:    sunpklo z6.h, z6.b
-; CHECK-NEXT:    sdivr z1.s, p0/m, z1.s, z4.s
-; CHECK-NEXT:    sunpklo z4.h, z2.b
+; CHECK-NEXT:    sunpklo z6.s, z6.h
+; CHECK-NEXT:    sunpklo z22.h, z16.b
+; CHECK-NEXT:    ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    sunpklo z17.h, z18.b
+; CHECK-NEXT:    sunpklo z24.s, z22.h
+; CHECK-NEXT:    sdivr z1.s, p0/m, z1.s, z6.s
+; CHECK-NEXT:    sunpklo z5.h, z2.b
 ; CHECK-NEXT:    sunpklo z2.s, z3.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z5.s, z4.h
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
+; CHECK-NEXT:    ext z3.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    sunpklo z7.s, z5.h
+; CHECK-NEXT:    ext z4.b, { z5.b, z6.b }, #8
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sunpklo z4.s, z4.h
-; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z5.s
-; CHECK-NEXT:    ldr q5, [x0]
-; CHECK-NEXT:    sunpklo z16.h, z5.b
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z5.h, z5.b
-; CHECK-NEXT:    sunpklo z18.s, z16.h
-; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z16.s, z16.h
+; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z7.s
+; CHECK-NEXT:    sunpklo z7.s, z20.h
+; CHECK-NEXT:    ext z20.b, { z20.b, z21.b }, #8
+; CHECK-NEXT:    ext z21.b, { z22.b, z23.b }, #8
+; CHECK-NEXT:    sunpklo z20.s, z20.h
+; CHECK-NEXT:    sunpklo z21.s, z21.h
+; CHECK-NEXT:    sdivr z20.s, p0/m, z20.s, z21.s
+; CHECK-NEXT:    sunpklo z21.h, z16.b
+; CHECK-NEXT:    sunpklo z16.s, z17.h
+; CHECK-NEXT:    ext z17.b, { z17.b, z18.b }, #8
+; CHECK-NEXT:    ext z18.b, { z21.b, z22.b }, #8
+; CHECK-NEXT:    sunpklo z19.s, z21.h
+; CHECK-NEXT:    sunpklo z17.s, z17.h
+; CHECK-NEXT:    sunpklo z18.s, z18.h
+; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z24.s
+; CHECK-NEXT:    sdivr z16.s, p0/m, z16.s, z19.s
+; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z7.h, z20.h, z20.h
 ; CHECK-NEXT:    sdivr z17.s, p0/m, z17.s, z18.s
-; CHECK-NEXT:    sunpklo z18.s, z5.h
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z5.s, z5.h
-; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    sunpklo z16.s, z6.h
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z6.s, z6.h
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT:    sdivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT:    uzp1 z18.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
-; CHECK-NEXT:    sdiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z18.h, z2.h, z2.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT:    splice z2.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z1.h, z17.h, z17.h
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z19.h, z3.h, z3.h
+; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    splice z1.h, p0, { z18.h, z19.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
 ; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
 ; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
@@ -534,21 +534,21 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z2.s, z1.h
-; CHECK-NEXT:    sunpklo z3.s, z0.h
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT:    sunpklo z0.s, z1.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    ext z2.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    sunpklo z5.s, z3.h
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    sunpklo z2.s, z2.h
+; CHECK-NEXT:    sdivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT:    sdivr z1.s, p0/m, z1.s, z2.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -598,33 +598,33 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 define void @sdiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-LABEL: sdiv_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q4, q1, [x1]
+; CHECK-NEXT:    ldp q16, q2, [x1]
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ldr q0, [x0, #16]
-; CHECK-NEXT:    sunpklo z2.s, z1.h
-; CHECK-NEXT:    sunpklo z3.s, z0.h
-; CHECK-NEXT:    sunpklo z5.s, z4.h
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    ldr q3, [x0]
-; CHECK-NEXT:    sunpklo z4.s, z4.h
-; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    sunpklo z6.s, z3.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.s, z3.h
+; CHECK-NEXT:    sunpklo z4.s, z2.h
+; CHECK-NEXT:    sunpklo z5.s, z0.h
+; CHECK-NEXT:    sunpklo z7.s, z16.h
+; CHECK-NEXT:    ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; CHECK-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT:    ldr q5, [x0]
+; CHECK-NEXT:    sunpklo z1.s, z2.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    sdiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT:    sunpklo z18.s, z5.h
+; CHECK-NEXT:    ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    sunpklo z6.s, z16.h
+; CHECK-NEXT:    sunpklo z5.s, z5.h
+; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z18.s
+; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT:    sdiv z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z1.h, z7.h, z7.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z2.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -972,21 +972,21 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    uunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.h, z1.b
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    uunpklo z3.s, z0.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    uunpklo z5.s, z3.h
+; CHECK-NEXT:    uunpklo z0.s, z1.h
+; CHECK-NEXT:    ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    ext z2.b, { z3.b, z4.b }, #8
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    udivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT:    udivr z1.s, p0/m, z1.s, z2.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -1038,40 +1038,40 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: udiv_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z2.h, z1.b
-; CHECK-NEXT:    uunpklo z3.h, z0.b
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT:    uunpklo z5.h, z1.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z4.s, z2.h
-; CHECK-NEXT:    uunpklo z5.s, z3.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z1.h, z1.b
-; CHECK-NEXT:    uunpklo z2.s, z2.h
-; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    udivr z4.s, p0/m, z4.s, z5.s
-; CHECK-NEXT:    uunpklo z5.s, z0.h
-; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    uunpklo z3.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    uunpklo z16.h, z3.b
+; CHECK-NEXT:    ext z2.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    uunpklo z0.s, z5.h
+; CHECK-NEXT:    ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    uunpklo z3.h, z1.b
+; CHECK-NEXT:    uunpklo z1.h, z2.b
+; CHECK-NEXT:    ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    uunpklo z7.s, z16.h
+; CHECK-NEXT:    uunpklo z5.s, z5.h
+; CHECK-NEXT:    uunpklo z6.s, z6.h
+; CHECK-NEXT:    udivr z0.s, p0/m, z0.s, z7.s
+; CHECK-NEXT:    uunpklo z7.s, z1.h
+; CHECK-NEXT:    ext z1.b, { z1.b, z2.b }, #8
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
+; CHECK-NEXT:    uunpklo z6.s, z3.h
+; CHECK-NEXT:    ext z3.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    uunpklo z2.s, z3.h
+; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
+; CHECK-NEXT:    udiv z1.s, p0/m, z1.s, z2.s
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z6.h, z6.h
+; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
 ; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
@@ -1155,73 +1155,73 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-LABEL: udiv_v32i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q6, q3, [x1]
+; CHECK-NEXT:    ldp q18, q4, [x1]
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ldr q2, [x0, #16]
-; CHECK-NEXT:    uunpklo z1.h, z3.b
-; CHECK-NEXT:    uunpklo z4.h, z2.b
-; CHECK-NEXT:    uunpklo z7.h, z6.b
-; CHECK-NEXT:    uunpklo z0.s, z1.h
-; CHECK-NEXT:    uunpklo z5.s, z4.h
-; CHECK-NEXT:    uunpklo z17.s, z7.h
-; CHECK-NEXT:    udivr z0.s, p0/m, z0.s, z5.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
+; CHECK-NEXT:    uunpklo z6.h, z4.b
+; CHECK-NEXT:    uunpklo z16.h, z2.b
+; CHECK-NEXT:    ext z4.b, { z4.b, z5.b }, #8
+; CHECK-NEXT:    ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    uunpklo z20.h, z18.b
+; CHECK-NEXT:    ext z18.b, { z18.b, z19.b }, #8
+; CHECK-NEXT:    uunpklo z3.h, z4.b
+; CHECK-NEXT:    uunpklo z0.s, z6.h
+; CHECK-NEXT:    uunpklo z1.s, z16.h
+; CHECK-NEXT:    udivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ext z1.b, { z6.b, z7.b }, #8
+; CHECK-NEXT:    ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    ldr q16, [x0]
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    uunpklo z4.s, z4.h
-; CHECK-NEXT:    uunpklo z3.h, z3.b
-; CHECK-NEXT:    uunpklo z7.s, z7.h
-; CHECK-NEXT:    uunpklo z6.h, z6.b
-; CHECK-NEXT:    udivr z1.s, p0/m, z1.s, z4.s
-; CHECK-NEXT:    uunpklo z4.h, z2.b
+; CHECK-NEXT:    uunpklo z6.s, z6.h
+; CHECK-NEXT:    uunpklo z22.h, z16.b
+; CHECK-NEXT:    ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    uunpklo z17.h, z18.b
+; CHECK-NEXT:    uunpklo z24.s, z22.h
+; CHECK-NEXT:    udivr z1.s, p0/m, z1.s, z6.s
+; CHECK-NEXT:    uunpklo z5.h, z2.b
 ; CHECK-NEXT:    uunpklo z2.s, z3.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z5.s, z4.h
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
+; CHECK-NEXT:    ext z3.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    uunpklo z7.s, z5.h
+; CHECK-NEXT:    ext z4.b, { z5.b, z6.b }, #8
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    uunpklo z4.s, z4.h
-; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z5.s
-; CHECK-NEXT:    ldr q5, [x0]
-; CHECK-NEXT:    uunpklo z16.h, z5.b
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z5.h, z5.b
-; CHECK-NEXT:    uunpklo z18.s, z16.h
-; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z16.s, z16.h
+; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z7.s
+; CHECK-NEXT:    uunpklo z7.s, z20.h
+; CHECK-NEXT:    ext z20.b, { z20.b, z21.b }, #8
+; CHECK-NEXT:    ext z21.b, { z22.b, z23.b }, #8
+; CHECK-NEXT:    uunpklo z20.s, z20.h
+; CHECK-NEXT:    uunpklo z21.s, z21.h
+; CHECK-NEXT:    udivr z20.s, p0/m, z20.s, z21.s
+; CHECK-NEXT:    uunpklo z21.h, z16.b
+; CHECK-NEXT:    uunpklo z16.s, z17.h
+; CHECK-NEXT:    ext z17.b, { z17.b, z18.b }, #8
+; CHECK-NEXT:    ext z18.b, { z21.b, z22.b }, #8
+; CHECK-NEXT:    uunpklo z19.s, z21.h
+; CHECK-NEXT:    uunpklo z17.s, z17.h
+; CHECK-NEXT:    uunpklo z18.s, z18.h
+; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z24.s
+; CHECK-NEXT:    udivr z16.s, p0/m, z16.s, z19.s
+; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z7.h, z20.h, z20.h
 ; CHECK-NEXT:    udivr z17.s, p0/m, z17.s, z18.s
-; CHECK-NEXT:    uunpklo z18.s, z5.h
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z5.s, z5.h
-; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uunpklo z16.s, z6.h
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z6.s, z6.h
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT:    udivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT:    uzp1 z18.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
-; CHECK-NEXT:    udiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z18.h, z2.h, z2.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT:    splice z2.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z1.h, z17.h, z17.h
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z19.h, z3.h, z3.h
+; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    splice z1.h, p0, { z18.h, z19.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
 ; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
 ; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
@@ -1448,21 +1448,21 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: udiv_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    uunpklo z3.s, z0.h
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT:    uunpklo z0.s, z1.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    ext z2.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    uunpklo z5.s, z3.h
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    udivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT:    udivr z1.s, p0/m, z1.s, z2.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -1512,33 +1512,33 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 define void @udiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-LABEL: udiv_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q4, q1, [x1]
+; CHECK-NEXT:    ldp q16, q2, [x1]
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ldr q0, [x0, #16]
-; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    uunpklo z3.s, z0.h
-; CHECK-NEXT:    uunpklo z5.s, z4.h
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    ldr q3, [x0]
-; CHECK-NEXT:    uunpklo z4.s, z4.h
-; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    uunpklo z6.s, z3.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.s, z3.h
+; CHECK-NEXT:    uunpklo z4.s, z2.h
+; CHECK-NEXT:    uunpklo z5.s, z0.h
+; CHECK-NEXT:    uunpklo z7.s, z16.h
+; CHECK-NEXT:    ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; CHECK-NEXT:    udivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT:    ldr q5, [x0]
+; CHECK-NEXT:    uunpklo z1.s, z2.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    udiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT:    uunpklo z18.s, z5.h
+; CHECK-NEXT:    ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    uunpklo z6.s, z16.h
+; CHECK-NEXT:    uunpklo z5.s, z5.h
+; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z18.s
+; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT:    udiv z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z1.h, z7.h, z7.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z2.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index b022c19363ed6..eb8d61246344b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -14,19 +14,33 @@ target triple = "aarch64-unknown-linux-gnu"
 ; type's element type is not byte based and thus cannot be lowered directly to
 ; an SVE instruction.
 define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i1_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    uunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    lsl z1.s, z1.s, #31
-; CHECK-NEXT:    lsl z0.s, z0.s, #31
-; CHECK-NEXT:    asr z1.s, z1.s, #31
-; CHECK-NEXT:    asr z0.s, z0.s, #31
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v8i1_v8i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    uunpklo z0.h, z0.b
+; SVE-NEXT:    uunpklo z1.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    lsl z1.s, z1.s, #31
+; SVE-NEXT:    lsl z0.s, z0.s, #31
+; SVE-NEXT:    asr z1.s, z1.s, #31
+; SVE-NEXT:    asr z0.s, z0.s, #31
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v8i1_v8i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    uunpklo z0.h, z0.b
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    uunpklo z1.s, z2.h
+; SVE2-NEXT:    lsl z0.s, z0.s, #31
+; SVE2-NEXT:    lsl z1.s, z1.s, #31
+; SVE2-NEXT:    asr z0.s, z0.s, #31
+; SVE2-NEXT:    asr z1.s, z1.s, #31
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i1_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -70,19 +84,33 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
 ; type's element type is not power-of-2 based and thus cannot be lowered
 ; directly to an SVE instruction.
 define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
-; CHECK-LABEL: sext_v4i3_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z1.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    lsl z1.d, z1.d, #61
-; CHECK-NEXT:    lsl z0.d, z0.d, #61
-; CHECK-NEXT:    asr z1.d, z1.d, #61
-; CHECK-NEXT:    asr z0.d, z0.d, #61
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v4i3_v4i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z1.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    lsl z1.d, z1.d, #61
+; SVE-NEXT:    lsl z0.d, z0.d, #61
+; SVE-NEXT:    asr z1.d, z1.d, #61
+; SVE-NEXT:    asr z0.d, z0.d, #61
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v4i3_v4i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    uunpklo z1.d, z2.s
+; SVE2-NEXT:    lsl z0.d, z0.d, #61
+; SVE2-NEXT:    lsl z1.d, z1.d, #61
+; SVE2-NEXT:    asr z0.d, z0.d, #61
+; SVE2-NEXT:    asr z1.d, z1.d, #61
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v4i3_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -113,14 +141,23 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
 ;
 
 define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v16i8_v16i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z1.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v16i8_v16i16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    sunpklo z1.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.h, z0.b
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v16i8_v16i16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.h, z0.b
+; SVE2-NEXT:    sunpklo z1.h, z2.b
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v16i8_v16i16:
 ; NONEON-NOSVE:       // %bb.0:
@@ -171,20 +208,35 @@ define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
 
 ; NOTE: Extra 'add' is to prevent the extend being combined with the load.
 define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v32i8_v32i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
-; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    sunpklo z2.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.h, z1.b
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sunpklo z1.h, z1.b
-; CHECK-NEXT:    stp q2, q0, [x1, #32]
-; CHECK-NEXT:    stp q3, q1, [x1]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v32i8_v32i16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.b, z0.b, z0.b
+; SVE-NEXT:    add z1.b, z1.b, z1.b
+; SVE-NEXT:    sunpklo z2.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.h, z1.b
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.h, z0.b
+; SVE-NEXT:    sunpklo z1.h, z1.b
+; SVE-NEXT:    stp q2, q0, [x1, #32]
+; SVE-NEXT:    stp q3, q1, [x1]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v32i8_v32i16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.b, z0.b, z0.b
+; SVE2-NEXT:    add z0.b, z1.b, z1.b
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z2.h, z2.b
+; SVE2-NEXT:    sunpklo z0.h, z0.b
+; SVE2-NEXT:    sunpklo z3.h, z4.b
+; SVE2-NEXT:    sunpklo z1.h, z5.b
+; SVE2-NEXT:    stp q0, q1, [x1]
+; SVE2-NEXT:    stp q2, q3, [x1, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v32i8_v32i16:
 ; NONEON-NOSVE:       // %bb.0:
@@ -365,15 +417,25 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
 ;
 
 define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i8_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v8i8_v8i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    sunpklo z0.h, z0.b
+; SVE-NEXT:    sunpklo z1.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v8i8_v8i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    sunpklo z0.h, z0.b
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.s, z0.h
+; SVE2-NEXT:    sunpklo z1.s, z2.h
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i8_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -402,21 +464,37 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 }
 
 define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v16i8_v16i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z1.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sunpklo z2.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    stp q2, q1, [x0]
-; CHECK-NEXT:    stp q3, q0, [x0, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v16i8_v16i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    sunpklo z1.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.h, z0.b
+; SVE-NEXT:    sunpklo z2.s, z1.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z1.s, z1.h
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    stp q2, q1, [x0]
+; SVE-NEXT:    stp q3, q0, [x0, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v16i8_v16i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.h, z0.b
+; SVE2-NEXT:    sunpklo z2.h, z2.b
+; SVE2-NEXT:    ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.s, z0.h
+; SVE2-NEXT:    ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z2.s, z2.h
+; SVE2-NEXT:    sunpklo z1.s, z4.h
+; SVE2-NEXT:    sunpklo z3.s, z5.h
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    stp q2, q3, [x0, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v16i8_v16i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -460,34 +538,63 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 }
 
 define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v32i8_v32i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
-; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    sunpklo z2.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.h, z1.b
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sunpklo z4.s, z2.h
-; CHECK-NEXT:    sunpklo z5.s, z3.h
-; CHECK-NEXT:    sunpklo z1.h, z1.b
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z6.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z7.s, z1.h
-; CHECK-NEXT:    sunpklo z2.s, z2.h
-; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    stp q5, q3, [x1]
-; CHECK-NEXT:    stp q4, q2, [x1, #64]
-; CHECK-NEXT:    stp q6, q0, [x1, #96]
-; CHECK-NEXT:    stp q7, q1, [x1, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v32i8_v32i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.b, z0.b, z0.b
+; SVE-NEXT:    add z1.b, z1.b, z1.b
+; SVE-NEXT:    sunpklo z2.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.h, z1.b
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.h, z0.b
+; SVE-NEXT:    sunpklo z4.s, z2.h
+; SVE-NEXT:    sunpklo z5.s, z3.h
+; SVE-NEXT:    sunpklo z1.h, z1.b
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    sunpklo z6.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z7.s, z1.h
+; SVE-NEXT:    sunpklo z2.s, z2.h
+; SVE-NEXT:    sunpklo z3.s, z3.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    sunpklo z1.s, z1.h
+; SVE-NEXT:    stp q5, q3, [x1]
+; SVE-NEXT:    stp q4, q2, [x1, #64]
+; SVE-NEXT:    stp q6, q0, [x1, #96]
+; SVE-NEXT:    stp q7, q1, [x1, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v32i8_v32i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.b, z0.b, z0.b
+; SVE2-NEXT:    add z0.b, z1.b, z1.b
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z2.h, z2.b
+; SVE2-NEXT:    sunpklo z5.h, z0.b
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z16.h, z4.b
+; SVE2-NEXT:    ext z1.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z18.h, z0.b
+; SVE2-NEXT:    ext z0.b, { z5.b, z6.b }, #8
+; SVE2-NEXT:    sunpklo z2.s, z2.h
+; SVE2-NEXT:    sunpklo z3.s, z5.h
+; SVE2-NEXT:    sunpklo z1.s, z1.h
+; SVE2-NEXT:    sunpklo z0.s, z0.h
+; SVE2-NEXT:    ext z4.b, { z16.b, z17.b }, #8
+; SVE2-NEXT:    ext z5.b, { z18.b, z19.b }, #8
+; SVE2-NEXT:    sunpklo z6.s, z16.h
+; SVE2-NEXT:    stp q3, q0, [x1]
+; SVE2-NEXT:    sunpklo z3.s, z18.h
+; SVE2-NEXT:    stp q2, q1, [x1, #64]
+; SVE2-NEXT:    sunpklo z2.s, z4.h
+; SVE2-NEXT:    sunpklo z1.s, z5.h
+; SVE2-NEXT:    stp q3, q1, [x1, #32]
+; SVE2-NEXT:    stp q6, q2, [x1, #96]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v32i8_v32i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -659,18 +766,31 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
 ; extend is a two step process where the container is any_extend'd with the
 ; result feeding an inreg sign extend.
 define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v4i8_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z1.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    sxtb z1.d, p0/m, z1.d
-; CHECK-NEXT:    sxtb z0.d, p0/m, z0.d
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v4i8_v4i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    ptrue p0.d, vl2
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z1.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    sxtb z1.d, p0/m, z1.d
+; SVE-NEXT:    sxtb z0.d, p0/m, z0.d
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v4i8_v4i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    ptrue p0.d, vl2
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    uunpklo z1.d, z2.s
+; SVE2-NEXT:    sxtb z0.d, p0/m, z0.d
+; SVE2-NEXT:    sxtb z1.d, p0/m, z1.d
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v4i8_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -695,22 +815,39 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 }
 
 define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i8_v8i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z2.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z1.d, z1.s
-; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q2, q1, [x0]
-; CHECK-NEXT:    stp q3, q0, [x0, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v8i8_v8i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    sunpklo z0.h, z0.b
+; SVE-NEXT:    sunpklo z1.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    sunpklo z2.d, z1.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z1.d, z1.s
+; SVE-NEXT:    sunpklo z0.d, z0.s
+; SVE-NEXT:    stp q2, q1, [x0]
+; SVE-NEXT:    stp q3, q0, [x0, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v8i8_v8i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    sunpklo z0.h, z0.b
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.s, z0.h
+; SVE2-NEXT:    sunpklo z2.s, z2.h
+; SVE2-NEXT:    ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z1.d, z4.s
+; SVE2-NEXT:    sunpklo z2.d, z2.s
+; SVE2-NEXT:    sunpklo z3.d, z5.s
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    stp q2, q3, [x0, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i8_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -741,35 +878,65 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 }
 
 define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v16i8_v16i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z1.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sunpklo z2.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z4.d, z2.s
-; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z5.d, z3.s
-; CHECK-NEXT:    sunpklo z2.d, z2.s
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z6.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z7.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.d, z3.s
-; CHECK-NEXT:    sunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q4, q2, [x0]
-; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q6, q1, [x0, #32]
-; CHECK-NEXT:    stp q5, q3, [x0, #64]
-; CHECK-NEXT:    stp q7, q0, [x0, #96]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v16i8_v16i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    sunpklo z1.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.h, z0.b
+; SVE-NEXT:    sunpklo z2.s, z1.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z4.d, z2.s
+; SVE-NEXT:    sunpklo z1.s, z1.h
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    sunpklo z5.d, z3.s
+; SVE-NEXT:    sunpklo z2.d, z2.s
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    sunpklo z6.d, z1.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z7.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.d, z3.s
+; SVE-NEXT:    sunpklo z1.d, z1.s
+; SVE-NEXT:    stp q4, q2, [x0]
+; SVE-NEXT:    sunpklo z0.d, z0.s
+; SVE-NEXT:    stp q6, q1, [x0, #32]
+; SVE-NEXT:    stp q5, q3, [x0, #64]
+; SVE-NEXT:    stp q7, q0, [x0, #96]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v16i8_v16i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.h, z0.b
+; SVE2-NEXT:    sunpklo z2.h, z2.b
+; SVE2-NEXT:    ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.s, z0.h
+; SVE2-NEXT:    sunpklo z5.s, z2.h
+; SVE2-NEXT:    ext z2.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z3.s, z4.h
+; SVE2-NEXT:    ext z7.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    sunpklo z1.s, z2.h
+; SVE2-NEXT:    ext z16.b, { z5.b, z6.b }, #8
+; SVE2-NEXT:    sunpklo z5.d, z5.s
+; SVE2-NEXT:    sunpklo z7.d, z7.s
+; SVE2-NEXT:    ext z6.b, { z3.b, z4.b }, #8
+; SVE2-NEXT:    sunpklo z3.d, z3.s
+; SVE2-NEXT:    sunpklo z16.d, z16.s
+; SVE2-NEXT:    sunpklo z4.d, z6.s
+; SVE2-NEXT:    stp q0, q7, [x0]
+; SVE2-NEXT:    ext z0.b, { z1.b, z2.b }, #8
+; SVE2-NEXT:    sunpklo z1.d, z1.s
+; SVE2-NEXT:    stp q5, q16, [x0, #64]
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    stp q3, q4, [x0, #32]
+; SVE2-NEXT:    stp q1, q0, [x0, #96]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v16i8_v16i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -817,67 +984,125 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 }
 
 define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v32i8_v32i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q0, q1, [x0]
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
-; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    sunpklo z3.h, z1.b
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    sunpklo z1.h, z1.b
-; CHECK-NEXT:    sunpklo z4.s, z3.h
-; CHECK-NEXT:    sunpklo z2.h, z2.b
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z5.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    mov z7.d, z1.d
-; CHECK-NEXT:    sunpklo z16.d, z4.s
-; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    sunpklo z6.s, z2.h
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    mov z17.d, z5.d
-; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    sunpklo z5.d, z5.s
-; CHECK-NEXT:    sunpklo z20.d, z1.s
-; CHECK-NEXT:    sunpklo z4.d, z4.s
-; CHECK-NEXT:    sunpklo z2.s, z2.h
-; CHECK-NEXT:    sunpklo z7.s, z7.h
-; CHECK-NEXT:    sunpklo z18.d, z6.s
-; CHECK-NEXT:    ext z17.b, z17.b, z0.b, #8
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z19.d, z3.s
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    stp q16, q4, [x1, #128]
-; CHECK-NEXT:    sunpklo z16.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z17.d, z17.s
-; CHECK-NEXT:    mov z4.d, z7.d
-; CHECK-NEXT:    sunpklo z1.d, z1.s
-; CHECK-NEXT:    sunpklo z3.d, z3.s
-; CHECK-NEXT:    sunpklo z7.d, z7.s
-; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q5, q17, [x1]
-; CHECK-NEXT:    sunpklo z5.d, z6.s
-; CHECK-NEXT:    mov z6.d, z2.d
-; CHECK-NEXT:    stp q19, q3, [x1, #160]
-; CHECK-NEXT:    sunpklo z2.d, z2.s
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    stp q16, q0, [x1, #32]
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT:    stp q20, q1, [x1, #192]
-; CHECK-NEXT:    stp q18, q5, [x1, #64]
-; CHECK-NEXT:    sunpklo z1.d, z4.s
-; CHECK-NEXT:    sunpklo z3.d, z6.s
-; CHECK-NEXT:    stp q7, q1, [x1, #224]
-; CHECK-NEXT:    stp q2, q3, [x1, #96]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v32i8_v32i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q0, q1, [x0]
+; SVE-NEXT:    add z0.b, z0.b, z0.b
+; SVE-NEXT:    add z1.b, z1.b, z1.b
+; SVE-NEXT:    mov z2.d, z0.d
+; SVE-NEXT:    sunpklo z3.h, z1.b
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.h, z0.b
+; SVE-NEXT:    sunpklo z1.h, z1.b
+; SVE-NEXT:    sunpklo z4.s, z3.h
+; SVE-NEXT:    sunpklo z2.h, z2.b
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    sunpklo z5.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    mov z7.d, z1.d
+; SVE-NEXT:    sunpklo z16.d, z4.s
+; SVE-NEXT:    sunpklo z1.s, z1.h
+; SVE-NEXT:    sunpklo z6.s, z2.h
+; SVE-NEXT:    ext z4.b, z4.b, z0.b, #8
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    ext z7.b, z7.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    mov z17.d, z5.d
+; SVE-NEXT:    sunpklo z3.s, z3.h
+; SVE-NEXT:    sunpklo z5.d, z5.s
+; SVE-NEXT:    sunpklo z20.d, z1.s
+; SVE-NEXT:    sunpklo z4.d, z4.s
+; SVE-NEXT:    sunpklo z2.s, z2.h
+; SVE-NEXT:    sunpklo z7.s, z7.h
+; SVE-NEXT:    sunpklo z18.d, z6.s
+; SVE-NEXT:    ext z17.b, z17.b, z0.b, #8
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    ext z6.b, z6.b, z0.b, #8
+; SVE-NEXT:    sunpklo z19.d, z3.s
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    stp q16, q4, [x1, #128]
+; SVE-NEXT:    sunpklo z16.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z17.d, z17.s
+; SVE-NEXT:    mov z4.d, z7.d
+; SVE-NEXT:    sunpklo z1.d, z1.s
+; SVE-NEXT:    sunpklo z3.d, z3.s
+; SVE-NEXT:    sunpklo z7.d, z7.s
+; SVE-NEXT:    sunpklo z0.d, z0.s
+; SVE-NEXT:    stp q5, q17, [x1]
+; SVE-NEXT:    sunpklo z5.d, z6.s
+; SVE-NEXT:    mov z6.d, z2.d
+; SVE-NEXT:    stp q19, q3, [x1, #160]
+; SVE-NEXT:    sunpklo z2.d, z2.s
+; SVE-NEXT:    ext z4.b, z4.b, z0.b, #8
+; SVE-NEXT:    stp q16, q0, [x1, #32]
+; SVE-NEXT:    ext z6.b, z6.b, z0.b, #8
+; SVE-NEXT:    stp q20, q1, [x1, #192]
+; SVE-NEXT:    stp q18, q5, [x1, #64]
+; SVE-NEXT:    sunpklo z1.d, z4.s
+; SVE-NEXT:    sunpklo z3.d, z6.s
+; SVE-NEXT:    stp q7, q1, [x1, #224]
+; SVE-NEXT:    stp q2, q3, [x1, #96]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v32i8_v32i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.b, z0.b, z0.b
+; SVE2-NEXT:    add z0.b, z1.b, z1.b
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z2.h, z2.b
+; SVE2-NEXT:    ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.h, z0.b
+; SVE2-NEXT:    sunpklo z6.h, z4.b
+; SVE2-NEXT:    sunpklo z4.h, z5.b
+; SVE2-NEXT:    ext z16.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z2.s, z2.h
+; SVE2-NEXT:    sunpklo z17.s, z0.h
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z19.s, z6.h
+; SVE2-NEXT:    sunpklo z21.s, z16.h
+; SVE2-NEXT:    ext z6.b, { z6.b, z7.b }, #8
+; SVE2-NEXT:    ext z7.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z2.d, z2.s
+; SVE2-NEXT:    sunpklo z23.s, z0.h
+; SVE2-NEXT:    ext z0.b, { z17.b, z18.b }, #8
+; SVE2-NEXT:    sunpklo z16.d, z17.s
+; SVE2-NEXT:    ext z1.b, { z4.b, z5.b }, #8
+; SVE2-NEXT:    sunpklo z4.s, z4.h
+; SVE2-NEXT:    sunpklo z3.d, z19.s
+; SVE2-NEXT:    ext z17.b, { z19.b, z20.b }, #8
+; SVE2-NEXT:    sunpklo z19.s, z6.h
+; SVE2-NEXT:    ext z6.b, { z21.b, z22.b }, #8
+; SVE2-NEXT:    sunpklo z18.d, z21.s
+; SVE2-NEXT:    sunpklo z7.d, z7.s
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    str q16, [x1]
+; SVE2-NEXT:    ext z21.b, { z4.b, z5.b }, #8
+; SVE2-NEXT:    sunpklo z4.d, z4.s
+; SVE2-NEXT:    sunpklo z5.d, z17.s
+; SVE2-NEXT:    sunpklo z6.d, z6.s
+; SVE2-NEXT:    stp q2, q7, [x1, #128]
+; SVE2-NEXT:    sunpklo z2.d, z23.s
+; SVE2-NEXT:    stp q3, q5, [x1, #192]
+; SVE2-NEXT:    ext z3.b, { z23.b, z24.b }, #8
+; SVE2-NEXT:    stp q18, q6, [x1, #160]
+; SVE2-NEXT:    sunpklo z17.s, z1.h
+; SVE2-NEXT:    sunpklo z1.d, z21.s
+; SVE2-NEXT:    stp q0, q2, [x1, #16]
+; SVE2-NEXT:    ext z2.b, { z19.b, z20.b }, #8
+; SVE2-NEXT:    sunpklo z3.d, z3.s
+; SVE2-NEXT:    ext z0.b, { z17.b, z18.b }, #8
+; SVE2-NEXT:    stp q4, q1, [x1, #64]
+; SVE2-NEXT:    sunpklo z4.d, z19.s
+; SVE2-NEXT:    sunpklo z2.d, z2.s
+; SVE2-NEXT:    sunpklo z5.d, z17.s
+; SVE2-NEXT:    str q3, [x1, #48]
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    stp q4, q2, [x1, #224]
+; SVE2-NEXT:    stp q5, q0, [x1, #96]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v32i8_v32i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1054,14 +1279,23 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
 ;
 
 define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i16_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v8i16_v8i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    sunpklo z1.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v8i16_v8i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.s, z0.h
+; SVE2-NEXT:    sunpklo z1.s, z2.h
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i16_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1091,20 +1325,35 @@ define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
 }
 
 define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v16i16_v16i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.h, z0.h, z0.h
-; CHECK-NEXT:    add z1.h, z1.h, z1.h
-; CHECK-NEXT:    sunpklo z2.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    stp q2, q0, [x1, #32]
-; CHECK-NEXT:    stp q3, q1, [x1]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v16i16_v16i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.h, z0.h, z0.h
+; SVE-NEXT:    add z1.h, z1.h, z1.h
+; SVE-NEXT:    sunpklo z2.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.s, z1.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    sunpklo z1.s, z1.h
+; SVE-NEXT:    stp q2, q0, [x1, #32]
+; SVE-NEXT:    stp q3, q1, [x1]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v16i16_v16i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.h, z0.h, z0.h
+; SVE2-NEXT:    add z0.h, z1.h, z1.h
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z2.s, z2.h
+; SVE2-NEXT:    sunpklo z0.s, z0.h
+; SVE2-NEXT:    sunpklo z3.s, z4.h
+; SVE2-NEXT:    sunpklo z1.s, z5.h
+; SVE2-NEXT:    stp q0, q1, [x1]
+; SVE2-NEXT:    stp q2, q3, [x1, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v16i16_v16i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1185,15 +1434,25 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
 ;
 
 define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
-; CHECK-LABEL: sext_v4i16_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z1.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v4i16_v4i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    sunpklo z1.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.d, z0.s
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v4i16_v4i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    sunpklo z0.s, z0.h
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    sunpklo z1.d, z2.s
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v4i16_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1216,21 +1475,37 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 }
 
 define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i16_v8i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z2.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z1.d, z1.s
-; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q2, q1, [x0]
-; CHECK-NEXT:    stp q3, q0, [x0, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v8i16_v8i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    sunpklo z1.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    sunpklo z2.d, z1.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z1.d, z1.s
+; SVE-NEXT:    sunpklo z0.d, z0.s
+; SVE-NEXT:    stp q2, q1, [x0]
+; SVE-NEXT:    stp q3, q0, [x0, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v8i16_v8i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.s, z0.h
+; SVE2-NEXT:    sunpklo z2.s, z2.h
+; SVE2-NEXT:    ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z2.d, z2.s
+; SVE2-NEXT:    sunpklo z1.d, z4.s
+; SVE2-NEXT:    sunpklo z3.d, z5.s
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    stp q2, q3, [x0, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i16_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1262,34 +1537,63 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 }
 
 define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v16i16_v16i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.h, z0.h, z0.h
-; CHECK-NEXT:    add z1.h, z1.h, z1.h
-; CHECK-NEXT:    sunpklo z2.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z4.d, z2.s
-; CHECK-NEXT:    sunpklo z5.d, z3.s
-; CHECK-NEXT:    sunpklo z1.s, z1.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z6.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z7.d, z1.s
-; CHECK-NEXT:    sunpklo z2.d, z2.s
-; CHECK-NEXT:    sunpklo z3.d, z3.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    sunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q5, q3, [x1]
-; CHECK-NEXT:    stp q4, q2, [x1, #64]
-; CHECK-NEXT:    stp q6, q0, [x1, #96]
-; CHECK-NEXT:    stp q7, q1, [x1, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v16i16_v16i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.h, z0.h, z0.h
+; SVE-NEXT:    add z1.h, z1.h, z1.h
+; SVE-NEXT:    sunpklo z2.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.s, z1.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.s, z0.h
+; SVE-NEXT:    sunpklo z4.d, z2.s
+; SVE-NEXT:    sunpklo z5.d, z3.s
+; SVE-NEXT:    sunpklo z1.s, z1.h
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    sunpklo z6.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z7.d, z1.s
+; SVE-NEXT:    sunpklo z2.d, z2.s
+; SVE-NEXT:    sunpklo z3.d, z3.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.d, z0.s
+; SVE-NEXT:    sunpklo z1.d, z1.s
+; SVE-NEXT:    stp q5, q3, [x1]
+; SVE-NEXT:    stp q4, q2, [x1, #64]
+; SVE-NEXT:    stp q6, q0, [x1, #96]
+; SVE-NEXT:    stp q7, q1, [x1, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v16i16_v16i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.h, z0.h, z0.h
+; SVE2-NEXT:    add z0.h, z1.h, z1.h
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z2.s, z2.h
+; SVE2-NEXT:    sunpklo z5.s, z0.h
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z16.s, z4.h
+; SVE2-NEXT:    ext z1.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    sunpklo z18.s, z0.h
+; SVE2-NEXT:    ext z0.b, { z5.b, z6.b }, #8
+; SVE2-NEXT:    sunpklo z2.d, z2.s
+; SVE2-NEXT:    sunpklo z3.d, z5.s
+; SVE2-NEXT:    sunpklo z1.d, z1.s
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    ext z4.b, { z16.b, z17.b }, #8
+; SVE2-NEXT:    ext z5.b, { z18.b, z19.b }, #8
+; SVE2-NEXT:    sunpklo z6.d, z16.s
+; SVE2-NEXT:    stp q3, q0, [x1]
+; SVE2-NEXT:    sunpklo z3.d, z18.s
+; SVE2-NEXT:    stp q2, q1, [x1, #64]
+; SVE2-NEXT:    sunpklo z2.d, z4.s
+; SVE2-NEXT:    sunpklo z1.d, z5.s
+; SVE2-NEXT:    stp q3, q1, [x1, #32]
+; SVE2-NEXT:    stp q6, q2, [x1, #96]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v16i16_v16i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1375,14 +1679,23 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
 ;
 
 define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
-; CHECK-LABEL: sext_v4i32_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z1.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v4i32_v4i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    sunpklo z1.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.d, z0.s
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v4i32_v4i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    sunpklo z1.d, z2.s
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v4i32_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1404,20 +1717,35 @@ define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
 }
 
 define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v8i32_v8i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.s, z0.s, z0.s
-; CHECK-NEXT:    add z1.s, z1.s, z1.s
-; CHECK-NEXT:    sunpklo z2.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    sunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q2, q0, [x1, #32]
-; CHECK-NEXT:    stp q3, q1, [x1]
-; CHECK-NEXT:    ret
+; SVE-LABEL: sext_v8i32_v8i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.s, z0.s, z0.s
+; SVE-NEXT:    add z1.s, z1.s, z1.s
+; SVE-NEXT:    sunpklo z2.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    sunpklo z3.d, z1.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    sunpklo z0.d, z0.s
+; SVE-NEXT:    sunpklo z1.d, z1.s
+; SVE-NEXT:    stp q2, q0, [x1, #32]
+; SVE-NEXT:    stp q3, q1, [x1]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: sext_v8i32_v8i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.s, z0.s, z0.s
+; SVE2-NEXT:    add z0.s, z1.s, z1.s
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    sunpklo z2.d, z2.s
+; SVE2-NEXT:    sunpklo z0.d, z0.s
+; SVE2-NEXT:    sunpklo z3.d, z4.s
+; SVE2-NEXT:    sunpklo z1.d, z5.s
+; SVE2-NEXT:    stp q0, q1, [x1]
+; SVE2-NEXT:    stp q2, q3, [x1, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sext_v8i32_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1466,14 +1794,23 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
 ;
 
 define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v16i8_v16i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z1.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v16i8_v16i16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    uunpklo z1.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.h, z0.b
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v16i8_v16i16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.h, z0.b
+; SVE2-NEXT:    uunpklo z1.h, z2.b
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v16i8_v16i16:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1524,20 +1861,35 @@ define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
 
 ; NOTE: Extra 'add' is to prevent the extend being combined with the load.
 define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v32i8_v32i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
-; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    uunpklo z2.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.h, z1.b
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    uunpklo z1.h, z1.b
-; CHECK-NEXT:    stp q2, q0, [x1, #32]
-; CHECK-NEXT:    stp q3, q1, [x1]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v32i8_v32i16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.b, z0.b, z0.b
+; SVE-NEXT:    add z1.b, z1.b, z1.b
+; SVE-NEXT:    uunpklo z2.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.h, z1.b
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.h, z0.b
+; SVE-NEXT:    uunpklo z1.h, z1.b
+; SVE-NEXT:    stp q2, q0, [x1, #32]
+; SVE-NEXT:    stp q3, q1, [x1]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v32i8_v32i16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.b, z0.b, z0.b
+; SVE2-NEXT:    add z0.b, z1.b, z1.b
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z2.h, z2.b
+; SVE2-NEXT:    uunpklo z0.h, z0.b
+; SVE2-NEXT:    uunpklo z3.h, z4.b
+; SVE2-NEXT:    uunpklo z1.h, z5.b
+; SVE2-NEXT:    stp q0, q1, [x1]
+; SVE2-NEXT:    stp q2, q3, [x1, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v32i8_v32i16:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1718,15 +2070,25 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
 ;
 
 define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v8i8_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    uunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v8i8_v8i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    uunpklo z0.h, z0.b
+; SVE-NEXT:    uunpklo z1.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v8i8_v8i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    uunpklo z0.h, z0.b
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    uunpklo z1.s, z2.h
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v8i8_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1755,21 +2117,37 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 }
 
 define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v16i8_v16i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z1.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    stp q2, q1, [x0]
-; CHECK-NEXT:    stp q3, q0, [x0, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v16i8_v16i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    uunpklo z1.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.h, z0.b
+; SVE-NEXT:    uunpklo z2.s, z1.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z1.s, z1.h
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    stp q2, q1, [x0]
+; SVE-NEXT:    stp q3, q0, [x0, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v16i8_v16i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.h, z0.b
+; SVE2-NEXT:    uunpklo z2.h, z2.b
+; SVE2-NEXT:    ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z2.s, z2.h
+; SVE2-NEXT:    uunpklo z1.s, z4.h
+; SVE2-NEXT:    uunpklo z3.s, z5.h
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    stp q2, q3, [x0, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v16i8_v16i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -1813,34 +2191,63 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 }
 
 define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v32i8_v32i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
-; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    uunpklo z2.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.h, z1.b
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    uunpklo z4.s, z2.h
-; CHECK-NEXT:    uunpklo z5.s, z3.h
-; CHECK-NEXT:    uunpklo z1.h, z1.b
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z6.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z7.s, z1.h
-; CHECK-NEXT:    uunpklo z2.s, z2.h
-; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    stp q5, q3, [x1]
-; CHECK-NEXT:    stp q4, q2, [x1, #64]
-; CHECK-NEXT:    stp q6, q0, [x1, #96]
-; CHECK-NEXT:    stp q7, q1, [x1, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v32i8_v32i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.b, z0.b, z0.b
+; SVE-NEXT:    add z1.b, z1.b, z1.b
+; SVE-NEXT:    uunpklo z2.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.h, z1.b
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.h, z0.b
+; SVE-NEXT:    uunpklo z4.s, z2.h
+; SVE-NEXT:    uunpklo z5.s, z3.h
+; SVE-NEXT:    uunpklo z1.h, z1.b
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    uunpklo z6.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z7.s, z1.h
+; SVE-NEXT:    uunpklo z2.s, z2.h
+; SVE-NEXT:    uunpklo z3.s, z3.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z1.s, z1.h
+; SVE-NEXT:    stp q5, q3, [x1]
+; SVE-NEXT:    stp q4, q2, [x1, #64]
+; SVE-NEXT:    stp q6, q0, [x1, #96]
+; SVE-NEXT:    stp q7, q1, [x1, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v32i8_v32i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.b, z0.b, z0.b
+; SVE2-NEXT:    add z0.b, z1.b, z1.b
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z2.h, z2.b
+; SVE2-NEXT:    uunpklo z5.h, z0.b
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z16.h, z4.b
+; SVE2-NEXT:    ext z1.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z18.h, z0.b
+; SVE2-NEXT:    ext z0.b, { z5.b, z6.b }, #8
+; SVE2-NEXT:    uunpklo z2.s, z2.h
+; SVE2-NEXT:    uunpklo z3.s, z5.h
+; SVE2-NEXT:    uunpklo z1.s, z1.h
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    ext z4.b, { z16.b, z17.b }, #8
+; SVE2-NEXT:    ext z5.b, { z18.b, z19.b }, #8
+; SVE2-NEXT:    uunpklo z6.s, z16.h
+; SVE2-NEXT:    stp q3, q0, [x1]
+; SVE2-NEXT:    uunpklo z3.s, z18.h
+; SVE2-NEXT:    stp q2, q1, [x1, #64]
+; SVE2-NEXT:    uunpklo z2.s, z4.h
+; SVE2-NEXT:    uunpklo z1.s, z5.h
+; SVE2-NEXT:    stp q3, q1, [x1, #32]
+; SVE2-NEXT:    stp q6, q2, [x1, #96]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v32i8_v32i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2012,16 +2419,27 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
 ; extend is a two step process where the container is zero_extend_inreg'd with
 ; the result feeding a normal zero extend from halfs to doublewords.
 define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v4i8_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    and z0.h, z0.h, #0xff
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z1.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v4i8_v4i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    and z0.h, z0.h, #0xff
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z1.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v4i8_v4i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    and z0.h, z0.h, #0xff
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    uunpklo z1.d, z2.s
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v4i8_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2046,22 +2464,39 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 }
 
 define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v8i8_v8i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    uunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z2.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z1.d, z1.s
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q2, q1, [x0]
-; CHECK-NEXT:    stp q3, q0, [x0, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v8i8_v8i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    uunpklo z0.h, z0.b
+; SVE-NEXT:    uunpklo z1.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z2.d, z1.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z1.d, z1.s
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    stp q2, q1, [x0]
+; SVE-NEXT:    stp q3, q0, [x0, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v8i8_v8i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    uunpklo z0.h, z0.b
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    uunpklo z2.s, z2.h
+; SVE2-NEXT:    ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z1.d, z4.s
+; SVE2-NEXT:    uunpklo z2.d, z2.s
+; SVE2-NEXT:    uunpklo z3.d, z5.s
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    stp q2, q3, [x0, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v8i8_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2096,35 +2531,65 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 }
 
 define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v16i8_v16i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z1.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z4.d, z2.s
-; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z5.d, z3.s
-; CHECK-NEXT:    uunpklo z2.d, z2.s
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z6.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z7.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.d, z3.s
-; CHECK-NEXT:    uunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q4, q2, [x0]
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q6, q1, [x0, #32]
-; CHECK-NEXT:    stp q5, q3, [x0, #64]
-; CHECK-NEXT:    stp q7, q0, [x0, #96]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v16i8_v16i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    uunpklo z1.h, z0.b
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.h, z0.b
+; SVE-NEXT:    uunpklo z2.s, z1.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z4.d, z2.s
+; SVE-NEXT:    uunpklo z1.s, z1.h
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z5.d, z3.s
+; SVE-NEXT:    uunpklo z2.d, z2.s
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    uunpklo z6.d, z1.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z7.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.d, z3.s
+; SVE-NEXT:    uunpklo z1.d, z1.s
+; SVE-NEXT:    stp q4, q2, [x0]
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    stp q6, q1, [x0, #32]
+; SVE-NEXT:    stp q5, q3, [x0, #64]
+; SVE-NEXT:    stp q7, q0, [x0, #96]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v16i8_v16i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.h, z0.b
+; SVE2-NEXT:    uunpklo z2.h, z2.b
+; SVE2-NEXT:    ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    uunpklo z5.s, z2.h
+; SVE2-NEXT:    ext z2.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z3.s, z4.h
+; SVE2-NEXT:    ext z7.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    uunpklo z1.s, z2.h
+; SVE2-NEXT:    ext z16.b, { z5.b, z6.b }, #8
+; SVE2-NEXT:    uunpklo z5.d, z5.s
+; SVE2-NEXT:    uunpklo z7.d, z7.s
+; SVE2-NEXT:    ext z6.b, { z3.b, z4.b }, #8
+; SVE2-NEXT:    uunpklo z3.d, z3.s
+; SVE2-NEXT:    uunpklo z16.d, z16.s
+; SVE2-NEXT:    uunpklo z4.d, z6.s
+; SVE2-NEXT:    stp q0, q7, [x0]
+; SVE2-NEXT:    ext z0.b, { z1.b, z2.b }, #8
+; SVE2-NEXT:    uunpklo z1.d, z1.s
+; SVE2-NEXT:    stp q5, q16, [x0, #64]
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    stp q3, q4, [x0, #32]
+; SVE2-NEXT:    stp q1, q0, [x0, #96]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v16i8_v16i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2180,67 +2645,125 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 }
 
 define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v32i8_v32i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q0, q1, [x0]
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
-; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    uunpklo z3.h, z1.b
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    uunpklo z1.h, z1.b
-; CHECK-NEXT:    uunpklo z4.s, z3.h
-; CHECK-NEXT:    uunpklo z2.h, z2.b
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z5.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    mov z7.d, z1.d
-; CHECK-NEXT:    uunpklo z16.d, z4.s
-; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    uunpklo z6.s, z2.h
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    mov z17.d, z5.d
-; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    uunpklo z5.d, z5.s
-; CHECK-NEXT:    uunpklo z20.d, z1.s
-; CHECK-NEXT:    uunpklo z4.d, z4.s
-; CHECK-NEXT:    uunpklo z2.s, z2.h
-; CHECK-NEXT:    uunpklo z7.s, z7.h
-; CHECK-NEXT:    uunpklo z18.d, z6.s
-; CHECK-NEXT:    ext z17.b, z17.b, z0.b, #8
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z19.d, z3.s
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    stp q16, q4, [x1, #128]
-; CHECK-NEXT:    uunpklo z16.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z17.d, z17.s
-; CHECK-NEXT:    mov z4.d, z7.d
-; CHECK-NEXT:    uunpklo z1.d, z1.s
-; CHECK-NEXT:    uunpklo z3.d, z3.s
-; CHECK-NEXT:    uunpklo z7.d, z7.s
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q5, q17, [x1]
-; CHECK-NEXT:    uunpklo z5.d, z6.s
-; CHECK-NEXT:    mov z6.d, z2.d
-; CHECK-NEXT:    stp q19, q3, [x1, #160]
-; CHECK-NEXT:    uunpklo z2.d, z2.s
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    stp q16, q0, [x1, #32]
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT:    stp q20, q1, [x1, #192]
-; CHECK-NEXT:    stp q18, q5, [x1, #64]
-; CHECK-NEXT:    uunpklo z1.d, z4.s
-; CHECK-NEXT:    uunpklo z3.d, z6.s
-; CHECK-NEXT:    stp q7, q1, [x1, #224]
-; CHECK-NEXT:    stp q2, q3, [x1, #96]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v32i8_v32i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q0, q1, [x0]
+; SVE-NEXT:    add z0.b, z0.b, z0.b
+; SVE-NEXT:    add z1.b, z1.b, z1.b
+; SVE-NEXT:    mov z2.d, z0.d
+; SVE-NEXT:    uunpklo z3.h, z1.b
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.h, z0.b
+; SVE-NEXT:    uunpklo z1.h, z1.b
+; SVE-NEXT:    uunpklo z4.s, z3.h
+; SVE-NEXT:    uunpklo z2.h, z2.b
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    uunpklo z5.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    mov z7.d, z1.d
+; SVE-NEXT:    uunpklo z16.d, z4.s
+; SVE-NEXT:    uunpklo z1.s, z1.h
+; SVE-NEXT:    uunpklo z6.s, z2.h
+; SVE-NEXT:    ext z4.b, z4.b, z0.b, #8
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    ext z7.b, z7.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    mov z17.d, z5.d
+; SVE-NEXT:    uunpklo z3.s, z3.h
+; SVE-NEXT:    uunpklo z5.d, z5.s
+; SVE-NEXT:    uunpklo z20.d, z1.s
+; SVE-NEXT:    uunpklo z4.d, z4.s
+; SVE-NEXT:    uunpklo z2.s, z2.h
+; SVE-NEXT:    uunpklo z7.s, z7.h
+; SVE-NEXT:    uunpklo z18.d, z6.s
+; SVE-NEXT:    ext z17.b, z17.b, z0.b, #8
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    ext z6.b, z6.b, z0.b, #8
+; SVE-NEXT:    uunpklo z19.d, z3.s
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    stp q16, q4, [x1, #128]
+; SVE-NEXT:    uunpklo z16.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z17.d, z17.s
+; SVE-NEXT:    mov z4.d, z7.d
+; SVE-NEXT:    uunpklo z1.d, z1.s
+; SVE-NEXT:    uunpklo z3.d, z3.s
+; SVE-NEXT:    uunpklo z7.d, z7.s
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    stp q5, q17, [x1]
+; SVE-NEXT:    uunpklo z5.d, z6.s
+; SVE-NEXT:    mov z6.d, z2.d
+; SVE-NEXT:    stp q19, q3, [x1, #160]
+; SVE-NEXT:    uunpklo z2.d, z2.s
+; SVE-NEXT:    ext z4.b, z4.b, z0.b, #8
+; SVE-NEXT:    stp q16, q0, [x1, #32]
+; SVE-NEXT:    ext z6.b, z6.b, z0.b, #8
+; SVE-NEXT:    stp q20, q1, [x1, #192]
+; SVE-NEXT:    stp q18, q5, [x1, #64]
+; SVE-NEXT:    uunpklo z1.d, z4.s
+; SVE-NEXT:    uunpklo z3.d, z6.s
+; SVE-NEXT:    stp q7, q1, [x1, #224]
+; SVE-NEXT:    stp q2, q3, [x1, #96]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v32i8_v32i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.b, z0.b, z0.b
+; SVE2-NEXT:    add z0.b, z1.b, z1.b
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z2.h, z2.b
+; SVE2-NEXT:    ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.h, z0.b
+; SVE2-NEXT:    uunpklo z6.h, z4.b
+; SVE2-NEXT:    uunpklo z4.h, z5.b
+; SVE2-NEXT:    ext z16.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z2.s, z2.h
+; SVE2-NEXT:    uunpklo z17.s, z0.h
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z19.s, z6.h
+; SVE2-NEXT:    uunpklo z21.s, z16.h
+; SVE2-NEXT:    ext z6.b, { z6.b, z7.b }, #8
+; SVE2-NEXT:    ext z7.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z2.d, z2.s
+; SVE2-NEXT:    uunpklo z23.s, z0.h
+; SVE2-NEXT:    ext z0.b, { z17.b, z18.b }, #8
+; SVE2-NEXT:    uunpklo z16.d, z17.s
+; SVE2-NEXT:    ext z1.b, { z4.b, z5.b }, #8
+; SVE2-NEXT:    uunpklo z4.s, z4.h
+; SVE2-NEXT:    uunpklo z3.d, z19.s
+; SVE2-NEXT:    ext z17.b, { z19.b, z20.b }, #8
+; SVE2-NEXT:    uunpklo z19.s, z6.h
+; SVE2-NEXT:    ext z6.b, { z21.b, z22.b }, #8
+; SVE2-NEXT:    uunpklo z18.d, z21.s
+; SVE2-NEXT:    uunpklo z7.d, z7.s
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    str q16, [x1]
+; SVE2-NEXT:    ext z21.b, { z4.b, z5.b }, #8
+; SVE2-NEXT:    uunpklo z4.d, z4.s
+; SVE2-NEXT:    uunpklo z5.d, z17.s
+; SVE2-NEXT:    uunpklo z6.d, z6.s
+; SVE2-NEXT:    stp q2, q7, [x1, #128]
+; SVE2-NEXT:    uunpklo z2.d, z23.s
+; SVE2-NEXT:    stp q3, q5, [x1, #192]
+; SVE2-NEXT:    ext z3.b, { z23.b, z24.b }, #8
+; SVE2-NEXT:    stp q18, q6, [x1, #160]
+; SVE2-NEXT:    uunpklo z17.s, z1.h
+; SVE2-NEXT:    uunpklo z1.d, z21.s
+; SVE2-NEXT:    stp q0, q2, [x1, #16]
+; SVE2-NEXT:    ext z2.b, { z19.b, z20.b }, #8
+; SVE2-NEXT:    uunpklo z3.d, z3.s
+; SVE2-NEXT:    ext z0.b, { z17.b, z18.b }, #8
+; SVE2-NEXT:    stp q4, q1, [x1, #64]
+; SVE2-NEXT:    uunpklo z4.d, z19.s
+; SVE2-NEXT:    uunpklo z2.d, z2.s
+; SVE2-NEXT:    uunpklo z5.d, z17.s
+; SVE2-NEXT:    str q3, [x1, #48]
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    stp q4, q2, [x1, #224]
+; SVE2-NEXT:    stp q5, q0, [x1, #96]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v32i8_v32i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2440,14 +2963,23 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
 ;
 
 define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
-; CHECK-LABEL: zext_v8i16_v8i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v8i16_v8i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    uunpklo z1.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v8i16_v8i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    uunpklo z1.s, z2.h
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v8i16_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2477,20 +3009,35 @@ define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
 }
 
 define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v16i16_v16i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.h, z0.h, z0.h
-; CHECK-NEXT:    add z1.h, z1.h, z1.h
-; CHECK-NEXT:    uunpklo z2.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    stp q2, q0, [x1, #32]
-; CHECK-NEXT:    stp q3, q1, [x1]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v16i16_v16i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.h, z0.h, z0.h
+; SVE-NEXT:    add z1.h, z1.h, z1.h
+; SVE-NEXT:    uunpklo z2.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.s, z1.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z1.s, z1.h
+; SVE-NEXT:    stp q2, q0, [x1, #32]
+; SVE-NEXT:    stp q3, q1, [x1]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v16i16_v16i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.h, z0.h, z0.h
+; SVE2-NEXT:    add z0.h, z1.h, z1.h
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z2.s, z2.h
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    uunpklo z3.s, z4.h
+; SVE2-NEXT:    uunpklo z1.s, z5.h
+; SVE2-NEXT:    stp q0, q1, [x1]
+; SVE2-NEXT:    stp q2, q3, [x1, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v16i16_v16i32:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2571,15 +3118,25 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
 ;
 
 define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
-; CHECK-LABEL: zext_v4i16_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z1.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v4i16_v4i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z1.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v4i16_v4i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    uunpklo z1.d, z2.s
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v4i16_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2604,21 +3161,37 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 }
 
 define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
-; CHECK-LABEL: zext_v8i16_v8i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z2.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z1.d, z1.s
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q2, q1, [x0]
-; CHECK-NEXT:    stp q3, q0, [x0, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v8i16_v8i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    uunpklo z1.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z2.d, z1.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z1.d, z1.s
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    stp q2, q1, [x0]
+; SVE-NEXT:    stp q3, q0, [x0, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v8i16_v8i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.s, z0.h
+; SVE2-NEXT:    uunpklo z2.s, z2.h
+; SVE2-NEXT:    ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z2.d, z2.s
+; SVE2-NEXT:    uunpklo z1.d, z4.s
+; SVE2-NEXT:    uunpklo z3.d, z5.s
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    stp q2, q3, [x0, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v8i16_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2654,34 +3227,63 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 }
 
 define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v16i16_v16i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.h, z0.h, z0.h
-; CHECK-NEXT:    add z1.h, z1.h, z1.h
-; CHECK-NEXT:    uunpklo z2.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z4.d, z2.s
-; CHECK-NEXT:    uunpklo z5.d, z3.s
-; CHECK-NEXT:    uunpklo z1.s, z1.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z6.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z7.d, z1.s
-; CHECK-NEXT:    uunpklo z2.d, z2.s
-; CHECK-NEXT:    uunpklo z3.d, z3.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    uunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q5, q3, [x1]
-; CHECK-NEXT:    stp q4, q2, [x1, #64]
-; CHECK-NEXT:    stp q6, q0, [x1, #96]
-; CHECK-NEXT:    stp q7, q1, [x1, #32]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v16i16_v16i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.h, z0.h, z0.h
+; SVE-NEXT:    add z1.h, z1.h, z1.h
+; SVE-NEXT:    uunpklo z2.s, z0.h
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.s, z1.h
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.s, z0.h
+; SVE-NEXT:    uunpklo z4.d, z2.s
+; SVE-NEXT:    uunpklo z5.d, z3.s
+; SVE-NEXT:    uunpklo z1.s, z1.h
+; SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT:    uunpklo z6.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z7.d, z1.s
+; SVE-NEXT:    uunpklo z2.d, z2.s
+; SVE-NEXT:    uunpklo z3.d, z3.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    uunpklo z1.d, z1.s
+; SVE-NEXT:    stp q5, q3, [x1]
+; SVE-NEXT:    stp q4, q2, [x1, #64]
+; SVE-NEXT:    stp q6, q0, [x1, #96]
+; SVE-NEXT:    stp q7, q1, [x1, #32]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v16i16_v16i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.h, z0.h, z0.h
+; SVE2-NEXT:    add z0.h, z1.h, z1.h
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z2.s, z2.h
+; SVE2-NEXT:    uunpklo z5.s, z0.h
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z16.s, z4.h
+; SVE2-NEXT:    ext z1.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    uunpklo z18.s, z0.h
+; SVE2-NEXT:    ext z0.b, { z5.b, z6.b }, #8
+; SVE2-NEXT:    uunpklo z2.d, z2.s
+; SVE2-NEXT:    uunpklo z3.d, z5.s
+; SVE2-NEXT:    uunpklo z1.d, z1.s
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    ext z4.b, { z16.b, z17.b }, #8
+; SVE2-NEXT:    ext z5.b, { z18.b, z19.b }, #8
+; SVE2-NEXT:    uunpklo z6.d, z16.s
+; SVE2-NEXT:    stp q3, q0, [x1]
+; SVE2-NEXT:    uunpklo z3.d, z18.s
+; SVE2-NEXT:    stp q2, q1, [x1, #64]
+; SVE2-NEXT:    uunpklo z2.d, z4.s
+; SVE2-NEXT:    uunpklo z1.d, z5.s
+; SVE2-NEXT:    stp q3, q1, [x1, #32]
+; SVE2-NEXT:    stp q6, q2, [x1, #96]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v16i16_v16i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2777,14 +3379,23 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
 ;
 
 define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
-; CHECK-LABEL: zext_v4i32_v4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z1.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q1, q0, [x0]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v4i32_v4i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    uunpklo z1.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    stp q1, q0, [x0]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v4i32_v4i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT:    ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    uunpklo z1.d, z2.s
+; SVE2-NEXT:    stp q0, q1, [x0]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v4i32_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -2808,20 +3419,35 @@ define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
 }
 
 define void @zext_v8i32_v8i64(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v8i32_v8i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    add z0.s, z0.s, z0.s
-; CHECK-NEXT:    add z1.s, z1.s, z1.s
-; CHECK-NEXT:    uunpklo z2.d, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.d, z1.s
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    uunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q2, q0, [x1, #32]
-; CHECK-NEXT:    stp q3, q1, [x1]
-; CHECK-NEXT:    ret
+; SVE-LABEL: zext_v8i32_v8i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldp q1, q0, [x0]
+; SVE-NEXT:    add z0.s, z0.s, z0.s
+; SVE-NEXT:    add z1.s, z1.s, z1.s
+; SVE-NEXT:    uunpklo z2.d, z0.s
+; SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT:    uunpklo z3.d, z1.s
+; SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT:    uunpklo z0.d, z0.s
+; SVE-NEXT:    uunpklo z1.d, z1.s
+; SVE-NEXT:    stp q2, q0, [x1, #32]
+; SVE-NEXT:    stp q3, q1, [x1]
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: zext_v8i32_v8i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    ldp q1, q0, [x0]
+; SVE2-NEXT:    add z2.s, z0.s, z0.s
+; SVE2-NEXT:    add z0.s, z1.s, z1.s
+; SVE2-NEXT:    ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT:    ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    uunpklo z2.d, z2.s
+; SVE2-NEXT:    uunpklo z0.d, z0.s
+; SVE2-NEXT:    uunpklo z3.d, z4.s
+; SVE2-NEXT:    uunpklo z1.d, z5.s
+; SVE2-NEXT:    stp q0, q1, [x1]
+; SVE2-NEXT:    stp q2, q3, [x1, #32]
+; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: zext_v8i32_v8i64:
 ; NONEON-NOSVE:       // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index bffef1352e44f..d880ebacee255 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -64,18 +64,18 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
-; CHECK-NEXT:    sunpklo z3.h, z0.b
+; CHECK-NEXT:    sunpklo z4.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    sunpklo z4.s, z2.h
-; CHECK-NEXT:    sunpklo z5.s, z3.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
+; CHECK-NEXT:    sunpklo z6.s, z2.h
+; CHECK-NEXT:    ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    ext z3.b, { z4.b, z5.b }, #8
+; CHECK-NEXT:    sunpklo z7.s, z4.h
 ; CHECK-NEXT:    sunpklo z2.s, z2.h
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z3.h, z6.h, z6.h
 ; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
 ; CHECK-NEXT:    splice z2.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
@@ -139,46 +139,44 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: srem_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z2.h, z1.b
-; CHECK-NEXT:    sunpklo z3.h, z0.b
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT:    sunpklo z5.h, z1.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    sunpklo z4.s, z2.h
-; CHECK-NEXT:    sunpklo z5.s, z3.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z2.s, z2.h
-; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
-; CHECK-NEXT:    mov z5.d, z0.d
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z5.h, z5.b
-; CHECK-NEXT:    sunpklo z7.s, z5.h
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    sunpklo z16.h, z3.b
+; CHECK-NEXT:    sunpklo z0.s, z5.h
+; CHECK-NEXT:    ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    sunpklo z7.s, z16.h
 ; CHECK-NEXT:    sunpklo z5.s, z5.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.h, z3.b
-; CHECK-NEXT:    sunpklo z6.s, z3.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    sunpklo z6.s, z6.h
+; CHECK-NEXT:    sdivr z0.s, p0/m, z0.s, z7.s
+; CHECK-NEXT:    ext z7.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
+; CHECK-NEXT:    ext z6.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    sunpklo z16.h, z6.b
+; CHECK-NEXT:    sunpklo z6.h, z7.b
+; CHECK-NEXT:    sunpklo z18.s, z16.h
+; CHECK-NEXT:    sunpklo z19.s, z6.h
+; CHECK-NEXT:    ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    ext z6.b, { z6.b, z7.b }, #8
+; CHECK-NEXT:    sunpklo z7.s, z16.h
+; CHECK-NEXT:    uzp1 z16.h, z0.h, z0.h
+; CHECK-NEXT:    sunpklo z6.s, z6.h
+; CHECK-NEXT:    sdivr z18.s, p0/m, z18.s, z19.s
+; CHECK-NEXT:    uzp1 z17.h, z5.h, z5.h
+; CHECK-NEXT:    sdiv z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z19.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z6.b, z0.b, z0.b
+; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
-; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
+; CHECK-NEXT:    splice z0.b, p0, { z6.b, z7.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT:    msb z0.b, p0/m, z1.b, z3.b
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -277,84 +275,80 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-LABEL: srem_v32i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
-; CHECK-NEXT:    ldr q1, [x1, #16]
+; CHECK-NEXT:    ldr q2, [x1, #16]
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    sunpklo z3.h, z1.b
-; CHECK-NEXT:    sunpklo z4.h, z0.b
-; CHECK-NEXT:    sunpklo z2.s, z3.h
-; CHECK-NEXT:    sunpklo z5.s, z4.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    sunpklo z4.s, z4.h
-; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z5.s
-; CHECK-NEXT:    movprfx z5, z4
-; CHECK-NEXT:    sdiv z5.s, p0/m, z5.s, z3.s
-; CHECK-NEXT:    mov z3.d, z1.d
-; CHECK-NEXT:    mov z4.d, z0.d
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z7.h, z3.b
-; CHECK-NEXT:    sunpklo z16.h, z4.b
-; CHECK-NEXT:    sunpklo z3.s, z7.h
-; CHECK-NEXT:    sunpklo z4.s, z16.h
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z7.s, z7.h
-; CHECK-NEXT:    movprfx z6, z4
-; CHECK-NEXT:    sdiv z6.s, p0/m, z6.s, z3.s
-; CHECK-NEXT:    ldr q3, [x0]
-; CHECK-NEXT:    ldr q4, [x1]
-; CHECK-NEXT:    sunpklo z16.s, z16.h
-; CHECK-NEXT:    sunpklo z17.h, z4.b
-; CHECK-NEXT:    sunpklo z18.h, z3.b
-; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    sunpklo z19.s, z17.h
-; CHECK-NEXT:    sunpklo z20.s, z18.h
-; CHECK-NEXT:    ext z17.b, z17.b, z0.b, #8
-; CHECK-NEXT:    ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z17.s, z17.h
-; CHECK-NEXT:    sunpklo z18.s, z18.h
-; CHECK-NEXT:    sdivr z19.s, p0/m, z19.s, z20.s
-; CHECK-NEXT:    mov z20.d, z3.d
-; CHECK-NEXT:    ext z20.b, z20.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z20.h, z20.b
-; CHECK-NEXT:    sunpklo z22.s, z20.h
-; CHECK-NEXT:    ext z20.b, z20.b, z0.b, #8
-; CHECK-NEXT:    sdivr z17.s, p0/m, z17.s, z18.s
-; CHECK-NEXT:    mov z18.d, z4.d
+; CHECK-NEXT:    sunpklo z5.h, z2.b
+; CHECK-NEXT:    sunpklo z16.h, z0.b
+; CHECK-NEXT:    sunpklo z4.s, z5.h
+; CHECK-NEXT:    ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    sunpklo z7.s, z16.h
+; CHECK-NEXT:    ldr q16, [x1]
+; CHECK-NEXT:    sunpklo z5.s, z5.h
+; CHECK-NEXT:    sunpklo z6.s, z6.h
+; CHECK-NEXT:    sunpklo z23.h, z16.b
+; CHECK-NEXT:    sdivr z4.s, p0/m, z4.s, z7.s
+; CHECK-NEXT:    movprfx z7, z6
+; CHECK-NEXT:    sdiv z7.s, p0/m, z7.s, z5.s
+; CHECK-NEXT:    ext z5.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    ext z6.b, { z0.b, z1.b }, #8
+; CHECK-NEXT:    sunpklo z27.s, z23.h
+; CHECK-NEXT:    ext z23.b, { z23.b, z24.b }, #8
+; CHECK-NEXT:    sunpklo z19.h, z5.b
+; CHECK-NEXT:    sunpklo z21.h, z6.b
+; CHECK-NEXT:    sunpklo z23.s, z23.h
+; CHECK-NEXT:    sunpklo z5.s, z19.h
+; CHECK-NEXT:    sunpklo z6.s, z21.h
+; CHECK-NEXT:    ext z19.b, { z19.b, z20.b }, #8
+; CHECK-NEXT:    ext z20.b, { z21.b, z22.b }, #8
+; CHECK-NEXT:    sunpklo z19.s, z19.h
+; CHECK-NEXT:    movprfx z18, z6
+; CHECK-NEXT:    sdiv z18.s, p0/m, z18.s, z5.s
+; CHECK-NEXT:    ldr q5, [x0]
 ; CHECK-NEXT:    sunpklo z20.s, z20.h
-; CHECK-NEXT:    ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z18.h, z18.b
-; CHECK-NEXT:    sunpklo z21.s, z18.h
-; CHECK-NEXT:    ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z18.s, z18.h
-; CHECK-NEXT:    sdivr z21.s, p0/m, z21.s, z22.s
-; CHECK-NEXT:    uzp1 z22.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT:    sdivr z18.s, p0/m, z18.s, z20.s
-; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
+; CHECK-NEXT:    sunpklo z25.h, z5.b
+; CHECK-NEXT:    sdivr z19.s, p0/m, z19.s, z20.s
+; CHECK-NEXT:    uzp1 z20.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
+; CHECK-NEXT:    ext z24.b, { z25.b, z26.b }, #8
+; CHECK-NEXT:    sunpklo z28.s, z25.h
+; CHECK-NEXT:    ext z25.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    sunpklo z24.s, z24.h
+; CHECK-NEXT:    sdivr z23.s, p0/m, z23.s, z24.s
+; CHECK-NEXT:    ext z24.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    sdivr z27.s, p0/m, z27.s, z28.s
+; CHECK-NEXT:    sunpklo z28.h, z24.b
+; CHECK-NEXT:    sunpklo z24.h, z25.b
+; CHECK-NEXT:    sunpklo z26.s, z28.h
+; CHECK-NEXT:    sunpklo z30.s, z24.h
+; CHECK-NEXT:    ext z28.b, { z28.b, z29.b }, #8
+; CHECK-NEXT:    ext z24.b, { z24.b, z25.b }, #8
+; CHECK-NEXT:    sunpklo z25.s, z28.h
+; CHECK-NEXT:    sunpklo z24.s, z24.h
+; CHECK-NEXT:    sdivr z26.s, p0/m, z26.s, z30.s
+; CHECK-NEXT:    uzp1 z27.h, z27.h, z27.h
+; CHECK-NEXT:    uzp1 z28.h, z23.h, z23.h
+; CHECK-NEXT:    sdiv z24.s, p0/m, z24.s, z25.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
-; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    uzp1 z16.h, z21.h, z21.h
-; CHECK-NEXT:    splice z2.h, p0, { z19.h, z20.h }
-; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z17.h, z18.h, z18.h
-; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z17.b, z2.b, z2.b
+; CHECK-NEXT:    splice z4.h, p0, { z27.h, z28.h }
+; CHECK-NEXT:    splice z7.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    uzp1 z22.h, z26.h, z26.h
+; CHECK-NEXT:    uzp1 z20.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z23.h, z24.h, z24.h
+; CHECK-NEXT:    uzp1 z24.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z25.h, z19.h, z19.h
+; CHECK-NEXT:    splice z18.h, p0, { z22.h, z23.h }
+; CHECK-NEXT:    uzp1 z22.b, z7.b, z7.b
+; CHECK-NEXT:    splice z19.h, p0, { z24.h, z25.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
-; CHECK-NEXT:    uzp1 z18.b, z16.b, z16.b
-; CHECK-NEXT:    splice z2.b, p0, { z17.b, z18.b }
+; CHECK-NEXT:    uzp1 z21.b, z18.b, z18.b
+; CHECK-NEXT:    uzp1 z23.b, z19.b, z19.b
+; CHECK-NEXT:    splice z4.b, p0, { z20.b, z21.b }
+; CHECK-NEXT:    splice z7.b, p0, { z22.b, z23.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
-; CHECK-NEXT:    msb z2.b, p0/m, z4.b, z3.b
-; CHECK-NEXT:    stp q2, q0, [x0]
+; CHECK-NEXT:    msb z4.b, p0/m, z16.b, z5.b
+; CHECK-NEXT:    mls z0.b, p0/m, z7.b, z2.b
+; CHECK-NEXT:    stp q4, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v32i8:
@@ -586,25 +580,23 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: srem_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    sunpklo z2.s, z1.h
-; CHECK-NEXT:    sunpklo z3.s, z0.h
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT:    sunpklo z0.s, z1.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    mov z4.d, z0.d
-; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    mov z3.d, z1.d
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z4.s, z4.h
-; CHECK-NEXT:    sunpklo z3.s, z3.h
-; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT:    sunpklo z5.s, z3.h
+; CHECK-NEXT:    ext z6.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    sunpklo z6.s, z6.h
+; CHECK-NEXT:    sdivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT:    ext z5.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    sunpklo z5.s, z5.h
+; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z6.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z7.h, z5.h, z5.h
+; CHECK-NEXT:    splice z0.h, p0, { z6.h, z7.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT:    msb z0.h, p0/m, z1.h, z3.h
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -662,41 +654,37 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 define void @srem_v16i16(ptr %a, ptr %b) {
 ; CHECK-LABEL: srem_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q4, q1, [x1]
+; CHECK-NEXT:    ldp q16, q2, [x1]
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ldr q0, [x0, #16]
-; CHECK-NEXT:    sunpklo z2.s, z1.h
-; CHECK-NEXT:    sunpklo z3.s, z0.h
-; CHECK-NEXT:    sunpklo z5.s, z4.h
-; CHECK-NEXT:    mov z16.d, z0.d
-; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    ldr q3, [x0]
-; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z6.s, z3.h
-; CHECK-NEXT:    mov z7.d, z3.d
-; CHECK-NEXT:    sunpklo z16.s, z16.h
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z7.s, z7.h
-; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    mov z6.d, z4.d
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z6.s, z6.h
-; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT:    mov z7.d, z1.d
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z7.s, z7.h
-; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
+; CHECK-NEXT:    sunpklo z4.s, z2.h
+; CHECK-NEXT:    sunpklo z5.s, z0.h
+; CHECK-NEXT:    sunpklo z7.s, z16.h
+; CHECK-NEXT:    ext z20.b, { z0.b, z1.b }, #8
+; CHECK-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT:    ldr q5, [x0]
+; CHECK-NEXT:    sunpklo z20.s, z20.h
+; CHECK-NEXT:    sunpklo z18.s, z5.h
+; CHECK-NEXT:    ext z19.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    sunpklo z19.s, z19.h
+; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z18.s
+; CHECK-NEXT:    ext z18.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    sunpklo z18.s, z18.h
+; CHECK-NEXT:    uzp1 z22.h, z4.h, z4.h
+; CHECK-NEXT:    sdivr z18.s, p0/m, z18.s, z19.s
+; CHECK-NEXT:    ext z19.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    sunpklo z19.s, z19.h
+; CHECK-NEXT:    sdivr z19.s, p0/m, z19.s, z20.s
+; CHECK-NEXT:    uzp1 z20.h, z7.h, z7.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
+; CHECK-NEXT:    splice z4.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    uzp1 z23.h, z19.h, z19.h
+; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
-; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
-; CHECK-NEXT:    stp q2, q0, [x0]
+; CHECK-NEXT:    msb z4.h, p0/m, z16.h, z5.h
+; CHECK-NEXT:    mls z0.h, p0/m, z7.h, z2.h
+; CHECK-NEXT:    stp q4, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v16i16:
@@ -1114,18 +1102,18 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
-; CHECK-NEXT:    uunpklo z3.h, z0.b
+; CHECK-NEXT:    uunpklo z4.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    uunpklo z4.s, z2.h
-; CHECK-NEXT:    uunpklo z5.s, z3.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
+; CHECK-NEXT:    uunpklo z6.s, z2.h
+; CHECK-NEXT:    ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    ext z3.b, { z4.b, z5.b }, #8
+; CHECK-NEXT:    uunpklo z7.s, z4.h
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    udivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z3.h, z6.h, z6.h
 ; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
 ; CHECK-NEXT:    splice z2.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
@@ -1189,46 +1177,44 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: urem_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z2.h, z1.b
-; CHECK-NEXT:    uunpklo z3.h, z0.b
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT:    uunpklo z5.h, z1.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    uunpklo z4.s, z2.h
-; CHECK-NEXT:    uunpklo z5.s, z3.h
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z2.s, z2.h
-; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    udivr z4.s, p0/m, z4.s, z5.s
-; CHECK-NEXT:    mov z5.d, z0.d
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z5.h, z5.b
-; CHECK-NEXT:    uunpklo z7.s, z5.h
-; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    uunpklo z16.h, z3.b
+; CHECK-NEXT:    uunpklo z0.s, z5.h
+; CHECK-NEXT:    ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    uunpklo z7.s, z16.h
 ; CHECK-NEXT:    uunpklo z5.s, z5.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.h, z3.b
-; CHECK-NEXT:    uunpklo z6.s, z3.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uunpklo z6.s, z6.h
+; CHECK-NEXT:    udivr z0.s, p0/m, z0.s, z7.s
+; CHECK-NEXT:    ext z7.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
+; CHECK-NEXT:    ext z6.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    uunpklo z16.h, z6.b
+; CHECK-NEXT:    uunpklo z6.h, z7.b
+; CHECK-NEXT:    uunpklo z18.s, z16.h
+; CHECK-NEXT:    uunpklo z19.s, z6.h
+; CHECK-NEXT:    ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    ext z6.b, { z6.b, z7.b }, #8
+; CHECK-NEXT:    uunpklo z7.s, z16.h
+; CHECK-NEXT:    uzp1 z16.h, z0.h, z0.h
+; CHECK-NEXT:    uunpklo z6.s, z6.h
+; CHECK-NEXT:    udivr z18.s, p0/m, z18.s, z19.s
+; CHECK-NEXT:    uzp1 z17.h, z5.h, z5.h
+; CHECK-NEXT:    udiv z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z19.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z6.b, z0.b, z0.b
+; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
-; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
+; CHECK-NEXT:    splice z0.b, p0, { z6.b, z7.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT:    msb z0.b, p0/m, z1.b, z3.b
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -1327,84 +1313,80 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-LABEL: urem_v32i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
-; CHECK-NEXT:    ldr q1, [x1, #16]
+; CHECK-NEXT:    ldr q2, [x1, #16]
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    uunpklo z3.h, z1.b
-; CHECK-NEXT:    uunpklo z4.h, z0.b
-; CHECK-NEXT:    uunpklo z2.s, z3.h
-; CHECK-NEXT:    uunpklo z5.s, z4.h
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    uunpklo z4.s, z4.h
-; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z5.s
-; CHECK-NEXT:    movprfx z5, z4
-; CHECK-NEXT:    udiv z5.s, p0/m, z5.s, z3.s
-; CHECK-NEXT:    mov z3.d, z1.d
-; CHECK-NEXT:    mov z4.d, z0.d
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z7.h, z3.b
-; CHECK-NEXT:    uunpklo z16.h, z4.b
-; CHECK-NEXT:    uunpklo z3.s, z7.h
-; CHECK-NEXT:    uunpklo z4.s, z16.h
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z7.s, z7.h
-; CHECK-NEXT:    movprfx z6, z4
-; CHECK-NEXT:    udiv z6.s, p0/m, z6.s, z3.s
-; CHECK-NEXT:    ldr q3, [x0]
-; CHECK-NEXT:    ldr q4, [x1]
-; CHECK-NEXT:    uunpklo z16.s, z16.h
-; CHECK-NEXT:    uunpklo z17.h, z4.b
-; CHECK-NEXT:    uunpklo z18.h, z3.b
-; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uunpklo z19.s, z17.h
-; CHECK-NEXT:    uunpklo z20.s, z18.h
-; CHECK-NEXT:    ext z17.b, z17.b, z0.b, #8
-; CHECK-NEXT:    ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z17.s, z17.h
-; CHECK-NEXT:    uunpklo z18.s, z18.h
-; CHECK-NEXT:    udivr z19.s, p0/m, z19.s, z20.s
-; CHECK-NEXT:    mov z20.d, z3.d
-; CHECK-NEXT:    ext z20.b, z20.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z20.h, z20.b
-; CHECK-NEXT:    uunpklo z22.s, z20.h
-; CHECK-NEXT:    ext z20.b, z20.b, z0.b, #8
-; CHECK-NEXT:    udivr z17.s, p0/m, z17.s, z18.s
-; CHECK-NEXT:    mov z18.d, z4.d
+; CHECK-NEXT:    uunpklo z5.h, z2.b
+; CHECK-NEXT:    uunpklo z16.h, z0.b
+; CHECK-NEXT:    uunpklo z4.s, z5.h
+; CHECK-NEXT:    ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    uunpklo z7.s, z16.h
+; CHECK-NEXT:    ldr q16, [x1]
+; CHECK-NEXT:    uunpklo z5.s, z5.h
+; CHECK-NEXT:    uunpklo z6.s, z6.h
+; CHECK-NEXT:    uunpklo z23.h, z16.b
+; CHECK-NEXT:    udivr z4.s, p0/m, z4.s, z7.s
+; CHECK-NEXT:    movprfx z7, z6
+; CHECK-NEXT:    udiv z7.s, p0/m, z7.s, z5.s
+; CHECK-NEXT:    ext z5.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    ext z6.b, { z0.b, z1.b }, #8
+; CHECK-NEXT:    uunpklo z27.s, z23.h
+; CHECK-NEXT:    ext z23.b, { z23.b, z24.b }, #8
+; CHECK-NEXT:    uunpklo z19.h, z5.b
+; CHECK-NEXT:    uunpklo z21.h, z6.b
+; CHECK-NEXT:    uunpklo z23.s, z23.h
+; CHECK-NEXT:    uunpklo z5.s, z19.h
+; CHECK-NEXT:    uunpklo z6.s, z21.h
+; CHECK-NEXT:    ext z19.b, { z19.b, z20.b }, #8
+; CHECK-NEXT:    ext z20.b, { z21.b, z22.b }, #8
+; CHECK-NEXT:    uunpklo z19.s, z19.h
+; CHECK-NEXT:    movprfx z18, z6
+; CHECK-NEXT:    udiv z18.s, p0/m, z18.s, z5.s
+; CHECK-NEXT:    ldr q5, [x0]
 ; CHECK-NEXT:    uunpklo z20.s, z20.h
-; CHECK-NEXT:    ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z18.h, z18.b
-; CHECK-NEXT:    uunpklo z21.s, z18.h
-; CHECK-NEXT:    ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z18.s, z18.h
-; CHECK-NEXT:    udivr z21.s, p0/m, z21.s, z22.s
-; CHECK-NEXT:    uzp1 z22.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT:    udivr z18.s, p0/m, z18.s, z20.s
-; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
+; CHECK-NEXT:    uunpklo z25.h, z5.b
+; CHECK-NEXT:    udivr z19.s, p0/m, z19.s, z20.s
+; CHECK-NEXT:    uzp1 z20.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
+; CHECK-NEXT:    ext z24.b, { z25.b, z26.b }, #8
+; CHECK-NEXT:    uunpklo z28.s, z25.h
+; CHECK-NEXT:    ext z25.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    uunpklo z24.s, z24.h
+; CHECK-NEXT:    udivr z23.s, p0/m, z23.s, z24.s
+; CHECK-NEXT:    ext z24.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    udivr z27.s, p0/m, z27.s, z28.s
+; CHECK-NEXT:    uunpklo z28.h, z24.b
+; CHECK-NEXT:    uunpklo z24.h, z25.b
+; CHECK-NEXT:    uunpklo z26.s, z28.h
+; CHECK-NEXT:    uunpklo z30.s, z24.h
+; CHECK-NEXT:    ext z28.b, { z28.b, z29.b }, #8
+; CHECK-NEXT:    ext z24.b, { z24.b, z25.b }, #8
+; CHECK-NEXT:    uunpklo z25.s, z28.h
+; CHECK-NEXT:    uunpklo z24.s, z24.h
+; CHECK-NEXT:    udivr z26.s, p0/m, z26.s, z30.s
+; CHECK-NEXT:    uzp1 z27.h, z27.h, z27.h
+; CHECK-NEXT:    uzp1 z28.h, z23.h, z23.h
+; CHECK-NEXT:    udiv z24.s, p0/m, z24.s, z25.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
-; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    uzp1 z16.h, z21.h, z21.h
-; CHECK-NEXT:    splice z2.h, p0, { z19.h, z20.h }
-; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z17.h, z18.h, z18.h
-; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z17.b, z2.b, z2.b
+; CHECK-NEXT:    splice z4.h, p0, { z27.h, z28.h }
+; CHECK-NEXT:    splice z7.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    uzp1 z22.h, z26.h, z26.h
+; CHECK-NEXT:    uzp1 z20.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z23.h, z24.h, z24.h
+; CHECK-NEXT:    uzp1 z24.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z25.h, z19.h, z19.h
+; CHECK-NEXT:    splice z18.h, p0, { z22.h, z23.h }
+; CHECK-NEXT:    uzp1 z22.b, z7.b, z7.b
+; CHECK-NEXT:    splice z19.h, p0, { z24.h, z25.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
-; CHECK-NEXT:    uzp1 z18.b, z16.b, z16.b
-; CHECK-NEXT:    splice z2.b, p0, { z17.b, z18.b }
+; CHECK-NEXT:    uzp1 z21.b, z18.b, z18.b
+; CHECK-NEXT:    uzp1 z23.b, z19.b, z19.b
+; CHECK-NEXT:    splice z4.b, p0, { z20.b, z21.b }
+; CHECK-NEXT:    splice z7.b, p0, { z22.b, z23.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
-; CHECK-NEXT:    msb z2.b, p0/m, z4.b, z3.b
-; CHECK-NEXT:    stp q2, q0, [x0]
+; CHECK-NEXT:    msb z4.b, p0/m, z16.b, z5.b
+; CHECK-NEXT:    mls z0.b, p0/m, z7.b, z2.b
+; CHECK-NEXT:    stp q4, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v32i8:
@@ -1636,25 +1618,23 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: urem_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    uunpklo z3.s, z0.h
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT:    uunpklo z0.s, z1.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    mov z4.d, z0.d
-; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    mov z3.d, z1.d
-; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z4.s, z4.h
-; CHECK-NEXT:    uunpklo z3.s, z3.h
-; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT:    uunpklo z5.s, z3.h
+; CHECK-NEXT:    ext z6.b, { z3.b, z4.b }, #8
+; CHECK-NEXT:    uunpklo z6.s, z6.h
+; CHECK-NEXT:    udivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT:    ext z5.b, { z1.b, z2.b }, #8
+; CHECK-NEXT:    uunpklo z5.s, z5.h
+; CHECK-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z6.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z7.h, z5.h, z5.h
+; CHECK-NEXT:    splice z0.h, p0, { z6.h, z7.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT:    msb z0.h, p0/m, z1.h, z3.h
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
@@ -1712,41 +1692,37 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 define void @urem_v16i16(ptr %a, ptr %b) {
 ; CHECK-LABEL: urem_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q4, q1, [x1]
+; CHECK-NEXT:    ldp q16, q2, [x1]
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ldr q0, [x0, #16]
-; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    uunpklo z3.s, z0.h
-; CHECK-NEXT:    uunpklo z5.s, z4.h
-; CHECK-NEXT:    mov z16.d, z0.d
-; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT:    ldr q3, [x0]
-; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z6.s, z3.h
-; CHECK-NEXT:    mov z7.d, z3.d
-; CHECK-NEXT:    uunpklo z16.s, z16.h
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z7.s, z7.h
-; CHECK-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    mov z6.d, z4.d
-; CHECK-NEXT:    ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z6.s, z6.h
-; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT:    mov z7.d, z1.d
-; CHECK-NEXT:    ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z7.s, z7.h
-; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
+; CHECK-NEXT:    uunpklo z4.s, z2.h
+; CHECK-NEXT:    uunpklo z5.s, z0.h
+; CHECK-NEXT:    uunpklo z7.s, z16.h
+; CHECK-NEXT:    ext z20.b, { z0.b, z1.b }, #8
+; CHECK-NEXT:    udivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT:    ldr q5, [x0]
+; CHECK-NEXT:    uunpklo z20.s, z20.h
+; CHECK-NEXT:    uunpklo z18.s, z5.h
+; CHECK-NEXT:    ext z19.b, { z5.b, z6.b }, #8
+; CHECK-NEXT:    uunpklo z19.s, z19.h
+; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z18.s
+; CHECK-NEXT:    ext z18.b, { z16.b, z17.b }, #8
+; CHECK-NEXT:    uunpklo z18.s, z18.h
+; CHECK-NEXT:    uzp1 z22.h, z4.h, z4.h
+; CHECK-NEXT:    udivr z18.s, p0/m, z18.s, z19.s
+; CHECK-NEXT:    ext z19.b, { z2.b, z3.b }, #8
+; CHECK-NEXT:    uunpklo z19.s, z19.h
+; CHECK-NEXT:    udivr z19.s, p0/m, z19.s, z20.s
+; CHECK-NEXT:    uzp1 z20.h, z7.h, z7.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
+; CHECK-NEXT:    splice z4.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    uzp1 z23.h, z19.h, z19.h
+; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
-; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
-; CHECK-NEXT:    stp q2, q0, [x0]
+; CHECK-NEXT:    msb z4.h, p0/m, z16.h, z5.h
+; CHECK-NEXT:    mls z0.h, p0/m, z7.h, z2.h
+; CHECK-NEXT:    stp q4, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v16i16:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
index 3627390b5edfa..bfa4bc2011b48 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s
 ; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
@@ -97,17 +97,17 @@ entry:
 define <2 x i32> @test2(ptr %arg1, ptr %arg2) {
 ; CHECK-LABEL: test2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    ldp q3, q4, [x0]
-; CHECK-NEXT:    add z2.s, z0.s, z0.s
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    add z1.s, z1.s, z1.s
-; CHECK-NEXT:    add z3.s, z3.s, z3.s
-; CHECK-NEXT:    add z4.s, z4.s, z4.s
-; CHECK-NEXT:    mov z0.s, s0
-; CHECK-NEXT:    stp q1, q2, [x0, #32]
-; CHECK-NEXT:    stp q3, q4, [x0]
+; CHECK-NEXT:    ldp q2, q0, [x0, #32]
+; CHECK-NEXT:    ldp q4, q5, [x0]
+; CHECK-NEXT:    ext z3.b, { z0.b, z1.b }, #8
+; CHECK-NEXT:    add z2.s, z2.s, z2.s
+; CHECK-NEXT:    add z1.s, z0.s, z0.s
+; CHECK-NEXT:    mov z0.s, s3
+; CHECK-NEXT:    add z3.s, z4.s, z4.s
+; CHECK-NEXT:    add z4.s, z5.s, z5.s
+; CHECK-NEXT:    stp q2, q1, [x0, #32]
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    stp q3, q4, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test2:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
index 93d6da19c0c33..1caf89fefeea5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve < %s | FileCheck %s
 ; RUN: llc -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT
-; RUN: llc -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE
+; RUN: llc -mattr=+dotprod,+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE
 ; RUN: llc -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -36,34 +36,33 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
 ;
 ; STREAMING-SVE-LABEL: reduce_uaddv_v16i8:
 ; STREAMING-SVE:       // %bb.0:
-; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
-; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-SVE-NEXT:    uunpklo z2.h, z1.b
-; STREAMING-SVE-NEXT:    uunpklo z3.h, z0.b
+; STREAMING-SVE-NEXT:    mov z3.d, z0.d
+; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; STREAMING-SVE-NEXT:    ext z0.b, { z1.b, z2.b }, #8
 ; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
-; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
-; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; STREAMING-SVE-NEXT:    uunpklo z1.h, z1.b
-; STREAMING-SVE-NEXT:    uunpklo z0.h, z0.b
-; STREAMING-SVE-NEXT:    uunpklo z4.s, z2.h
-; STREAMING-SVE-NEXT:    uunpklo z6.s, z3.h
-; STREAMING-SVE-NEXT:    mov z5.d, z1.d
-; STREAMING-SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
-; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
-; STREAMING-SVE-NEXT:    uunpklo z7.s, z0.h
+; STREAMING-SVE-NEXT:    ext z5.b, { z3.b, z4.b }, #8
+; STREAMING-SVE-NEXT:    uunpklo z6.h, z0.b
+; STREAMING-SVE-NEXT:    uunpklo z3.h, z3.b
+; STREAMING-SVE-NEXT:    ext z0.b, { z1.b, z2.b }, #8
 ; STREAMING-SVE-NEXT:    uunpklo z1.s, z1.h
-; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
-; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z0.b, #8
-; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
-; STREAMING-SVE-NEXT:    uunpklo z2.s, z2.h
-; STREAMING-SVE-NEXT:    uunpklo z3.s, z3.h
-; STREAMING-SVE-NEXT:    add z1.s, z7.s, z1.s
-; STREAMING-SVE-NEXT:    uunpklo z5.s, z5.h
+; STREAMING-SVE-NEXT:    uunpklo z16.h, z5.b
+; STREAMING-SVE-NEXT:    ext z5.b, { z6.b, z7.b }, #8
+; STREAMING-SVE-NEXT:    ext z19.b, { z3.b, z4.b }, #8
+; STREAMING-SVE-NEXT:    uunpklo z2.s, z3.h
 ; STREAMING-SVE-NEXT:    uunpklo z0.s, z0.h
-; STREAMING-SVE-NEXT:    add z2.s, z3.s, z2.s
-; STREAMING-SVE-NEXT:    add z1.s, z4.s, z1.s
-; STREAMING-SVE-NEXT:    add z0.s, z0.s, z5.s
-; STREAMING-SVE-NEXT:    add z0.s, z2.s, z0.s
+; STREAMING-SVE-NEXT:    uunpklo z6.s, z6.h
+; STREAMING-SVE-NEXT:    ext z18.b, { z16.b, z17.b }, #8
+; STREAMING-SVE-NEXT:    uunpklo z3.s, z5.h
+; STREAMING-SVE-NEXT:    uunpklo z5.s, z19.h
+; STREAMING-SVE-NEXT:    uunpklo z7.s, z16.h
+; STREAMING-SVE-NEXT:    add z1.s, z2.s, z1.s
+; STREAMING-SVE-NEXT:    uunpklo z4.s, z18.h
+; STREAMING-SVE-NEXT:    add z0.s, z5.s, z0.s
+; STREAMING-SVE-NEXT:    add z2.s, z7.s, z6.s
+; STREAMING-SVE-NEXT:    add z3.s, z4.s, z3.s
+; STREAMING-SVE-NEXT:    add z1.s, z1.s, z2.s
+; STREAMING-SVE-NEXT:    add z0.s, z0.s, z3.s
 ; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
 ; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
 ; STREAMING-SVE-NEXT:    fmov w0, s0
@@ -103,34 +102,33 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
 ;
 ; STREAMING-SVE-LABEL: reduce_saddv_v16i8:
 ; STREAMING-SVE:       // %bb.0:
-; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
-; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-SVE-NEXT:    sunpklo z2.h, z1.b
-; STREAMING-SVE-NEXT:    sunpklo z3.h, z0.b
+; STREAMING-SVE-NEXT:    mov z3.d, z0.d
+; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1_z2
+; STREAMING-SVE-NEXT:    ext z0.b, { z1.b, z2.b }, #8
 ; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
-; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z0.b, #8
-; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; STREAMING-SVE-NEXT:    sunpklo z1.h, z1.b
-; STREAMING-SVE-NEXT:    sunpklo z0.h, z0.b
-; STREAMING-SVE-NEXT:    sunpklo z4.s, z2.h
-; STREAMING-SVE-NEXT:    sunpklo z6.s, z3.h
-; STREAMING-SVE-NEXT:    mov z5.d, z1.d
-; STREAMING-SVE-NEXT:    ext z2.b, z2.b, z0.b, #8
-; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
-; STREAMING-SVE-NEXT:    sunpklo z7.s, z0.h
+; STREAMING-SVE-NEXT:    ext z5.b, { z3.b, z4.b }, #8
+; STREAMING-SVE-NEXT:    sunpklo z6.h, z0.b
+; STREAMING-SVE-NEXT:    sunpklo z3.h, z3.b
+; STREAMING-SVE-NEXT:    ext z0.b, { z1.b, z2.b }, #8
 ; STREAMING-SVE-NEXT:    sunpklo z1.s, z1.h
-; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
-; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z0.b, #8
-; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
-; STREAMING-SVE-NEXT:    sunpklo z2.s, z2.h
-; STREAMING-SVE-NEXT:    sunpklo z3.s, z3.h
-; STREAMING-SVE-NEXT:    add z1.s, z7.s, z1.s
-; STREAMING-SVE-NEXT:    sunpklo z5.s, z5.h
+; STREAMING-SVE-NEXT:    sunpklo z16.h, z5.b
+; STREAMING-SVE-NEXT:    ext z5.b, { z6.b, z7.b }, #8
+; STREAMING-SVE-NEXT:    ext z19.b, { z3.b, z4.b }, #8
+; STREAMING-SVE-NEXT:    sunpklo z2.s, z3.h
 ; STREAMING-SVE-NEXT:    sunpklo z0.s, z0.h
-; STREAMING-SVE-NEXT:    add z2.s, z3.s, z2.s
-; STREAMING-SVE-NEXT:    add z1.s, z4.s, z1.s
-; STREAMING-SVE-NEXT:    add z0.s, z0.s, z5.s
-; STREAMING-SVE-NEXT:    add z0.s, z2.s, z0.s
+; STREAMING-SVE-NEXT:    sunpklo z6.s, z6.h
+; STREAMING-SVE-NEXT:    ext z18.b, { z16.b, z17.b }, #8
+; STREAMING-SVE-NEXT:    sunpklo z3.s, z5.h
+; STREAMING-SVE-NEXT:    sunpklo z5.s, z19.h
+; STREAMING-SVE-NEXT:    sunpklo z7.s, z16.h
+; STREAMING-SVE-NEXT:    add z1.s, z2.s, z1.s
+; STREAMING-SVE-NEXT:    sunpklo z4.s, z18.h
+; STREAMING-SVE-NEXT:    add z0.s, z5.s, z0.s
+; STREAMING-SVE-NEXT:    add z2.s, z7.s, z6.s
+; STREAMING-SVE-NEXT:    add z3.s, z4.s, z3.s
+; STREAMING-SVE-NEXT:    add z1.s, z1.s, z2.s
+; STREAMING-SVE-NEXT:    add z0.s, z0.s, z3.s
 ; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
 ; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
 ; STREAMING-SVE-NEXT:    fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splice.ll b/llvm/test/CodeGen/AArch64/sve-vector-splice.ll
new file mode 100644
index 0000000000000..5d2a1251ad138
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splice.ll
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve  -verify-machineinstrs < %s | FileCheck %s --check-prefixes=SVE
+; RUN: llc -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=SVE2
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Test vector_splice patterns.
+; Note that this test is similar to named-vector-shuffles-sve.ll, but it focuses
+; on testing all supported types, and a positive "splice index".
+
+
+; i8 elements
+define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; SVE-LABEL: splice_nxv16i8:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #1
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv16i8:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #1
+; SVE2-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
+  ret <vscale x 16 x i8> %res
+}
+
+; i16 elements
+define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; SVE-LABEL: splice_nxv8i16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #2
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv8i16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #2
+; SVE2-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 1)
+  ret <vscale x 8 x i16> %res
+}
+
+; bf16 elements
+
+define <vscale x 8 x bfloat> @splice_nxv8bfloat(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; SVE-LABEL: splice_nxv8bfloat:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #2
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv8bfloat:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #2
+; SVE2-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bfloat(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 1)
+  ret <vscale x 8 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @splice_nxv4bfloat(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
+; SVE-LABEL: splice_nxv4bfloat:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #4
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv4bfloat:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #4
+; SVE2-NEXT:    ret
+  %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bfloat(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 1)
+  ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @splice_nxv2bfloat(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
+; SVE-LABEL: splice_nxv2bfloat:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv2bfloat:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    ret
+  %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv4bfloat(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 1)
+  ret <vscale x 2 x bfloat> %res
+}
+
+; f16 elements
+
+define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; SVE-LABEL: splice_nxv8f16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #2
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv8f16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #2
+; SVE2-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 1)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @splice_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; SVE-LABEL: splice_nxv4f16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #4
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv4f16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #4
+; SVE2-NEXT:    ret
+  %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 1)
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @splice_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; SVE-LABEL: splice_nxv2f16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv2f16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 1)
+  ret <vscale x 2 x half> %res
+}
+
+; i32 elements
+define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; SVE-LABEL: splice_nxv4i32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #4
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv4i32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #4
+; SVE2-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 1)
+  ret <vscale x 4 x i32> %res
+}
+
+; f32 elements
+
+define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; SVE-LABEL: splice_nxv4f32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #4
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv4f32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #4
+; SVE2-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 1)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @splice_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; SVE-LABEL: splice_nxv2f32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv2f32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    ret
+  %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 1)
+  ret <vscale x 2 x float> %res
+}
+
+; i64 elements
+define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; SVE-LABEL: splice_nxv2i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    ret
+  %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 1)
+  ret <vscale x 2 x i64> %res
+}
+
+; f64 elements
+define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; SVE-LABEL: splice_nxv2f64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: splice_nxv2f64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 1)
+  ret <vscale x 2 x double> %res
+}
+
+declare <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)
+
+declare <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, i32)
+declare <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, i32)
+declare <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, i32)
+declare <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
+declare <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)
+declare <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
+declare <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)
+declare <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
+
+declare <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
+declare <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
+declare <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
diff --git a/llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll
index b96fad8239190..6fd3aff0abda2 100644
--- a/llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll
@@ -52,9 +52,8 @@ define void @extract_v4i64_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2,2
 ; CHECK-LABEL: extract_v4i64_halves:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    str q1, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    str q2, [x1]
 ; CHECK-NEXT:    str q0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -70,9 +69,8 @@ define void @extract_v4double_halves(ptr %in, ptr %out, ptr %out2) vscale_range(
 ; CHECK-LABEL: extract_v4double_halves:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    str q1, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    str q2, [x1]
 ; CHECK-NEXT:    str q0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -88,9 +86,8 @@ define void @extract_v8i32_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2,2
 ; CHECK-LABEL: extract_v8i32_halves:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    str q1, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    str q2, [x1]
 ; CHECK-NEXT:    str q0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -110,9 +107,8 @@ define void @extract_v8i32_halves_intrinsic(ptr %in, ptr %out, ptr %out2) vscale
 ; CHECK-LABEL: extract_v8i32_halves_intrinsic:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    str q1, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    str q2, [x1]
 ; CHECK-NEXT:    str q0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -128,9 +124,8 @@ define void @extract_v8float_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2
 ; CHECK-LABEL: extract_v8float_halves:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    str q1, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    str q2, [x1]
 ; CHECK-NEXT:    str q0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -146,9 +141,8 @@ define void @extract_v8i32_half_unaligned(<8 x i32> %unused, ptr %in, ptr %out)
 ; CHECK-LABEL: extract_v8i32_half_unaligned:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    ext v0.16b, v0.16b, v2.16b, #8
 ; CHECK-NEXT:    str q0, [x1]
 ; CHECK-NEXT:    ret
 entry:
@@ -162,15 +156,13 @@ define void @extract_v8i32_quarters(ptr %in, ptr %out, ptr %out2, ptr %out3, ptr
 ; CHECK-LABEL: extract_v8i32_quarters:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    ext z2.b, z2.b, z0.b, #24
-; CHECK-NEXT:    str d1, [x1]
-; CHECK-NEXT:    str d2, [x2]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    ext z3.b, { z0.b, z1.b }, #24
+; CHECK-NEXT:    ext z4.b, { z0.b, z1.b }, #8
+; CHECK-NEXT:    str d2, [x1]
+; CHECK-NEXT:    str d3, [x2]
 ; CHECK-NEXT:    str d0, [x3]
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    str d0, [x4]
+; CHECK-NEXT:    str d4, [x4]
 ; CHECK-NEXT:    ret
 entry:
   %b = load <8 x i32>, ptr %in
@@ -189,9 +181,8 @@ define void @extract_v16i16_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2,
 ; CHECK-LABEL: extract_v16i16_halves:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    str q1, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    str q2, [x1]
 ; CHECK-NEXT:    str q0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -223,9 +214,8 @@ define void @extract_v16half_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2
 ; CHECK-LABEL: extract_v16half_halves:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    str q1, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    str q2, [x1]
 ; CHECK-NEXT:    str q0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -241,9 +231,8 @@ define void @extract_v32i8_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2,2
 ; CHECK-LABEL: extract_v32i8_halves:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT:    str q1, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT:    str q2, [x1]
 ; CHECK-NEXT:    str q0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -264,9 +253,8 @@ define void @extract_v8i64_halves(ptr %in, ptr %out, ptr %out2) vscale_range(4,4
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
 ; CHECK-NEXT:    ptrue p0.d, vl4
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #32
-; CHECK-NEXT:    st1d { z1.d }, p0, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #32
+; CHECK-NEXT:    st1d { z2.d }, p0, [x1]
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -283,9 +271,8 @@ define void @extract_v16i32_halves(ptr %in, ptr %out, ptr %out2) vscale_range(4,
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl8
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #32
-; CHECK-NEXT:    st1w { z1.s }, p0, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #32
+; CHECK-NEXT:    st1w { z2.s }, p0, [x1]
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -302,9 +289,8 @@ define void @extract_v32i16_halves(ptr %in, ptr %out, ptr %out2) vscale_range(4,
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
 ; CHECK-NEXT:    ptrue p0.h, vl16
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #32
-; CHECK-NEXT:    st1h { z1.h }, p0, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #32
+; CHECK-NEXT:    st1h { z2.h }, p0, [x1]
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x2]
 ; CHECK-NEXT:    ret
 entry:
@@ -322,9 +308,8 @@ define void @extract_v64i8_halves(ptr %in, ptr %out, ptr %out2) vscale_range(4,4
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr z0, [x0]
 ; CHECK-NEXT:    ptrue p0.b, vl32
-; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #32
-; CHECK-NEXT:    st1b { z1.b }, p0, [x1]
+; CHECK-NEXT:    ext z2.b, { z0.b, z1.b }, #32
+; CHECK-NEXT:    st1b { z2.b }, p0, [x1]
 ; CHECK-NEXT:    st1b { z0.b }, p0, [x2]
 ; CHECK-NEXT:    ret
 entry:



More information about the llvm-branch-commits mailing list